diff --git a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h --- a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h +++ b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h @@ -14,6 +14,7 @@ #include "llvm/IR/PassManager.h" namespace llvm { +class TargetTransformInfo; /// Argument promotion pass. /// @@ -26,6 +27,17 @@ public: ArgumentPromotionPass(unsigned MaxElements = 3u) : MaxElements(MaxElements) {} + /// Check if callers and the callee \p F agree how promoted arguments would be + /// passed. The ones that they do not agree on are eliminated from the sets but + /// the return value has to be observed as well. + static bool areFunctionArgsABICompatible( + const Function &F, const TargetTransformInfo &TTI, + SmallPtrSetImpl &ArgsToPromote, + SmallPtrSetImpl &ByValArgsToTransform); + + /// Checks if a type could have padding bytes. + static bool isDenselyPacked(Type *type, const DataLayout &DL); + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); }; diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -104,6 +104,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/PassManager.h" @@ -2198,6 +2199,45 @@ static const char ID; }; +/// An abstract interface for privatizability. +/// +/// A pointer is privatizable if it can be replaced by a new, private one. +/// Privatizing pointer reduces the use count, interaction between unrelated +/// code parts. +/// +/// In order for a pointer to be privatizable its value cannot be observed +/// (=nocapture), it is (for now) not written (=readonly & noalias), we know +/// what values are necessary to make the private copy look like the original +/// one, and the values we need can be loaded (=dereferenceable). +struct AAPrivatizablePtr : public StateWrapper, + public IRPosition { + AAPrivatizablePtr(const IRPosition &IRP) : IRPosition(IRP) {} + + /// Returns true if pointer privatization is assumed to be possible. + bool isAssumedPrivatizablePtr() const { return getAssumed(); } + + /// Returns true if pointer privatization is known to be possible. + bool isKnownPrivatizablePtr() const { return getKnown(); } + + /// Return the type we can choose for a private copy of the underlying + /// value. None means it is not clear yet, nullptr means there is none. + virtual Optional getPrivatizableType() const = 0; + + /// Return an IR position, see struct IRPosition. + /// + ///{ + IRPosition &getIRPosition() { return *this; } + const IRPosition &getIRPosition() const { return *this; } + ///} + + /// Create an abstract attribute view for the position \p IRP. + static AAPrivatizablePtr &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + /// An abstract interface for all memory related attributes. struct AAMemoryBehavior : public IRAttribute< diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -774,8 +774,7 @@ return true; } -/// Checks if a type could have padding bytes. -static bool isDenselyPacked(Type *type, const DataLayout &DL) { +bool ArgumentPromotionPass::isDenselyPacked(Type *type, const DataLayout &DL) { // There is no size information, so be conservative. if (!type->isSized()) return false; @@ -844,12 +843,14 @@ return false; } -static bool areFunctionArgsABICompatible( +bool ArgumentPromotionPass::areFunctionArgsABICompatible( const Function &F, const TargetTransformInfo &TTI, SmallPtrSetImpl &ArgsToPromote, SmallPtrSetImpl &ByValArgsToTransform) { for (const Use &U : F.uses()) { CallSite CS(U.getUser()); + if (!CS) + return false; const Function *Caller = CS.getCaller(); const Function *Callee = CS.getCalledFunction(); if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) || @@ -951,9 +952,9 @@ // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe, if the passed value is densely // packed or if we can prove the padding bytes are never accessed. - bool isSafeToPromote = - PtrArg->hasByValAttr() && - (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); + bool isSafeToPromote = PtrArg->hasByValAttr() && + (ArgumentPromotionPass::isDenselyPacked(AgTy, DL) || + !canPaddingBeAccessed(PtrArg)); if (isSafeToPromote) { if (StructType *STy = dyn_cast(AgTy)) { if (MaxElements > 0 && STy->getNumElements() > MaxElements) { @@ -1011,8 +1012,8 @@ if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return nullptr; - if (!areFunctionArgsABICompatible(*F, TTI, ArgsToPromote, - ByValArgsToTransform)) + if (!ArgumentPromotionPass::areFunctionArgsABICompatible( + *F, TTI, ArgsToPromote, ByValArgsToTransform)) return nullptr; return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite); diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -29,12 +29,15 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/InitializePasses.h" +#include "llvm/IR/NoFolder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -197,6 +200,75 @@ U->set(&New); } +/// Helper function to create a pointer of type \p ResTy, based on \p Ptr, and +/// advanced by \p Offset bytes. To aid later analysis the method tries to build +/// getelement pointer instructions that traverse the natural type of \p Ptr if +/// possible. If that fails, the remaining offset is adjusted byte-wise, hence +/// through a cast to i8*. +/// +/// TODO: This could probably live somewhere more prominantly if it doesn't +/// already exist. +static Value *constructPointer(Type *ResTy, Value *Ptr, int64_t Offset, + IRBuilder &IRB, const DataLayout &DL) { + assert(Offset >= 0 && "Negative offset not supported yet!"); + LLVM_DEBUG(dbgs() << "Construct pointer: " << *Ptr << " + " << Offset + << "-bytes as " << *ResTy << "\n"); + + // The initial type we are trying to traverse to get nice GEPs. + Type *Ty = Ptr->getType(); + + SmallVector Indices; + std::string GEPName = Ptr->getName(); + while (Offset) { + uint64_t Idx, Rem; + + if (auto *STy = dyn_cast(Ty)) { + const StructLayout *SL = DL.getStructLayout(STy); + if (int64_t(SL->getSizeInBytes()) < Offset) + break; + Idx = SL->getElementContainingOffset(Offset); + assert(Idx < STy->getNumElements() && "Offset calculation error!"); + Rem = Offset - SL->getElementOffset(Idx); + Ty = STy->getElementType(Idx); + } else if (auto *PTy = dyn_cast(Ty)) { + Ty = PTy->getElementType(); + if (!Ty->isSized()) + break; + uint64_t ElementSize = DL.getTypeAllocSize(Ty); + assert(ElementSize && "Expected type with size!"); + Idx = Offset / ElementSize; + Rem = Offset % ElementSize; + } else { + // Non-aggregate type, we cast and make byte-wise progress now. + break; + } + + LLVM_DEBUG(errs() << "Ty: " << *Ty << " Offset: " << Offset + << " Idx: " << Idx << " Rem: " << Rem << "\n"); + + GEPName += "." + std::to_string(Idx); + Indices.push_back(ConstantInt::get(IRB.getInt32Ty(), Idx)); + Offset = Rem; + } + + // Create a GEP if we collected indices above. + if (Indices.size()) + Ptr = IRB.CreateGEP(Ptr, Indices, GEPName); + + // If an offset is left we use byte-wise adjustment. + if (Offset) { + Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy()); + Ptr = IRB.CreateGEP(Ptr, IRB.getInt32(Offset), + GEPName + ".b" + Twine(Offset)); + } + + // Ensure the result has the requested type. + Ptr = IRB.CreateBitOrPointerCast(Ptr, ResTy, Ptr->getName() + ".cast"); + + LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n"); + return Ptr; +} + /// Recursively visit all values that might become \p IRP at some point. This /// will be done by looking through cast instructions, selects, phis, and calls /// with the "returned" attribute. Once we cannot look through the value any @@ -4392,6 +4464,434 @@ } }; +/// ----------------------- Privatizable Pointers ------------------------------ +struct AAPrivatizablePtrImpl : public AAPrivatizablePtr { + AAPrivatizablePtrImpl(const IRPosition &IRP) + : AAPrivatizablePtr(IRP), PrivatizableType(llvm::None) {} + + ChangeStatus indicatePessimisticFixpoint() override { + AAPrivatizablePtr::indicatePessimisticFixpoint(); + PrivatizableType = nullptr; + return ChangeStatus::CHANGED; + } + + /// Identify the type we can chose for a private copy of the underlying + /// argument. None means it is not clear yet, nullptr means there is none. + virtual Optional identifyPrivatizableType(Attributor &A) = 0; + + /// Return a privatizable type that encloses both T0 and T1. + /// TODO: This is merely a stub for now as we should manage a mapping as well. + Optional combineTypes(Optional T0, Optional T1) { + if (!T0.hasValue()) + return T1; + if (!T1.hasValue()) + return T0; + if (T0 == T1) + return T0; + return nullptr; + } + + Optional getPrivatizableType() const override { + return PrivatizableType; + } + + const std::string getAsStr() const override { + return isAssumedPrivatizablePtr() ? "[priv]" : "[no-priv]"; + } + +protected: + Optional PrivatizableType; +}; + +// TODO: Do this for call site arguments (probably also other values) as well. + +struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { + AAPrivatizablePtrArgument(const IRPosition &IRP) + : AAPrivatizablePtrImpl(IRP) {} + + /// See AAPrivatizablePtrImpl::identifyPrivatizableType(...) + Optional identifyPrivatizableType(Attributor &A) override { + // If this is a byval argument and we know all the call sites (so we can + // rewrite them), there is no need to check them explicitly. + if (getIRPosition().hasAttr(Attribute::ByVal) && + A.checkForAllCallSites([](AbstractCallSite ACS) { return true; }, *this, + true)) + return getAssociatedValue().getType()->getPointerElementType(); + + Optional Ty; + unsigned ArgNo = getIRPosition().getArgNo(); + + // Make sure the associated call site argument has the same type at all call + // sites and it is an allocation we know is safe to privatize, for now that + // means we only allow alloca instructions. + // TODO: We can additionally analyze the accesses in the callee to create + // the type from that information instead. That is a little more + // involved and will be done in a follow up patch. + auto CallSiteCheck = [&](AbstractCallSite ACS) { + IRPosition ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo); + // Check if a coresponding argument was found or if it is one not + // associated (which can happen for callback calls). + if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID) + return false; + + // Check that all call sites agree on a type. + auto &PrivCSArgAA = A.getAAFor(*this, ACSArgPos); + Optional CSTy = PrivCSArgAA.getPrivatizableType(); + + LLVM_DEBUG({ + dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: "; + if (CSTy.hasValue() && CSTy.getValue()) + CSTy.getValue()->print(dbgs()); + else if (CSTy.hasValue()) + dbgs() << ""; + else + dbgs() << ""; + }); + + Ty = combineTypes(Ty, CSTy); + + LLVM_DEBUG({ + dbgs() << " : New Type: "; + if (Ty.hasValue() && Ty.getValue()) + Ty.getValue()->print(dbgs()); + else if (Ty.hasValue()) + dbgs() << ""; + else + dbgs() << ""; + dbgs() << "\n"; + }); + + return !Ty.hasValue() || Ty.getValue(); + }; + + if (!A.checkForAllCallSites(CallSiteCheck, *this, true)) + return nullptr; + return Ty; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + PrivatizableType = identifyPrivatizableType(A); + if (!PrivatizableType.hasValue()) + return ChangeStatus::UNCHANGED; + if (!PrivatizableType.getValue()) + return indicatePessimisticFixpoint(); + + // Avoid arguments with padding for now. + if (!ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(), + A.getInfoCache().getDL())) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Padding detected\n"); + return indicatePessimisticFixpoint(); + } + + // Verify callee and caller agree on how the promoted argument would be + // passed. + // TODO: The use of the ArgumentPromotion interface here is ugly, we need a + // specialized form of TargetTransformInfo::areFunctionArgsABICompatible + // which doesn't require the arguments ArgumentPromotion wanted to pass. + Function &Fn = *getIRPosition().getAnchorScope(); + SmallPtrSet ArgsToPromote, Dummy; + ArgsToPromote.insert(getAssociatedArgument()); + const auto *TTI = + A.getInfoCache().getAnalysisResultForFunction(Fn); + if (!TTI || + !ArgumentPromotionPass::areFunctionArgsABICompatible( + Fn, *TTI, ArgsToPromote, Dummy) || + ArgsToPromote.empty()) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected\n"); + return indicatePessimisticFixpoint(); + } + + return ChangeStatus::UNCHANGED; + } + + /// Given a type to private \p PrivType, collect the constituates (which are + /// used) in \p ReplacementTypes. + static void + identifyReplacementTypes(Type *PrivType, + SmallVectorImpl &ReplacementTypes) { + // TODO: For now we expand the privatization type to the fullest which can + // lead to dead arguments that need to be removed later. + assert(PrivType && "Expected privatizable type!"); + + // Traverse the type, extract constituate types on the outermost level. + if (auto *PrivStructType = dyn_cast(PrivType)) { + for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) + ReplacementTypes.push_back(PrivStructType->getElementType(u)); + } else if (auto *PrivArrayType = dyn_cast(PrivType)) { + ReplacementTypes.append(PrivArrayType->getNumElements(), + PrivArrayType->getElementType()); + } else { + ReplacementTypes.push_back(PrivType); + } + } + + /// Initialize \p Base according to the type \p PrivType at position \p IP. + /// The values needed are taken from the arguments of \p F starting at + /// position \p ArgNo. + static void createInitialization(Type *PrivType, Value &Base, Function &F, + unsigned ArgNo, Instruction &IP) { + assert(PrivType && "Expected privatizable type!"); + + IRBuilder IRB(&IP); + const DataLayout &DL = F.getParent()->getDataLayout(); + + // Traverse the type, build GEPs and stores. + if (auto *PrivStructType = dyn_cast(PrivType)) { + const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType); + for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) { + Type *PointeeTy = PrivStructType->getElementType(u)->getPointerTo(); + Value *Ptr = constructPointer( + PointeeTy, &Base, PrivStructLayout->getElementOffset(u), IRB, DL); + new StoreInst(F.getArg(ArgNo + u), Ptr, &IP); + } + } else if (auto *PrivArrayType = dyn_cast(PrivType)) { + Type *PointeePtrTy = PrivArrayType->getElementType()->getPointerTo(); + uint64_t PointeeTySize = DL.getTypeStoreSize(PointeePtrTy); + for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) { + Value *Ptr = + constructPointer(PointeePtrTy, &Base, u * PointeeTySize, IRB, DL); + new StoreInst(F.getArg(ArgNo + u), Ptr, &IP); + } + } else { + new StoreInst(F.getArg(ArgNo), &Base, &IP); + } + } + + /// Extract values from \p Base according to the type \p PrivType at the + /// call position \p ACS. The values are appended to \p ReplacementValues. + void createReplacementValues(Type *PrivType, AbstractCallSite ACS, + Value *Base, + SmallVectorImpl &ReplacementValues) { + assert(Base && "Expected base value!"); + assert(PrivType && "Expected privatizable type!"); + Instruction *IP = ACS.getInstruction(); + + IRBuilder IRB(IP); + const DataLayout &DL = IP->getModule()->getDataLayout(); + + if (Base->getType()->getPointerElementType() != PrivType) + Base = BitCastInst::CreateBitOrPointerCast(Base, PrivType->getPointerTo(), + "", ACS.getInstruction()); + + // Traverse the type, build GEPs and loads. + if (auto *PrivStructType = dyn_cast(PrivType)) { + const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType); + for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) { + Type *PointeeTy = PrivStructType->getElementType(u); + Value *Ptr = + constructPointer(PointeeTy->getPointerTo(), Base, + PrivStructLayout->getElementOffset(u), IRB, DL); + ReplacementValues.push_back(new LoadInst(PointeeTy, Ptr, "", IP)); + } + } else if (auto *PrivArrayType = dyn_cast(PrivType)) { + Type *PointeeTy = PrivArrayType->getElementType(); + uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy); + Type *PointeePtrTy = PointeeTy->getPointerTo(); + for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) { + Value *Ptr = + constructPointer(PointeePtrTy, Base, u * PointeeTySize, IRB, DL); + ReplacementValues.push_back(new LoadInst(PointeePtrTy, Ptr, "", IP)); + } + } else { + ReplacementValues.push_back(new LoadInst(PrivType, Base, "", IP)); + } + } + + /// See AbstractAttribute::manifest(...) + ChangeStatus manifest(Attributor &A) override { + if (!PrivatizableType.hasValue()) + return ChangeStatus::UNCHANGED; + assert(PrivatizableType.getValue() && "Expected privatizable type!"); + + // Collect all tail calls in the function as we cannot allow new allocas to + // escape into tail recursion. + // TODO: Be smarter about new allocas escaping into tail calls. + SmallVector TailCalls; + if (!A.checkForAllInstructions( + [&](Instruction &I) { + CallInst &CI = cast(I); + if (CI.isTailCall()) + TailCalls.push_back(&CI); + return true; + }, + *this, {Instruction::Call})) + return ChangeStatus::UNCHANGED; + + Argument *Arg = getAssociatedArgument(); + + // Callback to repair the associated function. A new alloca is placed at the + // beginning and initialized with the values passed through arguments. The + // new alloca replaces the use of the old pointer argument. + Attributor::ArgumentReplacementInfo::CalleeRepairCBTy FnRepairCB = + [=](const Attributor::ArgumentReplacementInfo &ARI, + Function &ReplacementFn, Function::arg_iterator ArgIt) { + BasicBlock &EntryBB = ReplacementFn.getEntryBlock(); + Instruction *IP = &*EntryBB.getFirstInsertionPt(); + auto *AI = new AllocaInst(PrivatizableType.getValue(), 0, + Arg->getName() + ".priv", IP); + createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn, + ArgIt->getArgNo(), *IP); + Arg->replaceAllUsesWith(AI); + + for (CallInst *CI : TailCalls) + CI->setTailCall(false); + }; + + // Callback to repair a call site of the associated function. The elements + // of the privatizable type are loaded prior to the call and passed to the + // new function version. + Attributor::ArgumentReplacementInfo::ACSRepairCBTy ACSRepairCB = + [=](const Attributor::ArgumentReplacementInfo &ARI, + AbstractCallSite ACS, SmallVectorImpl &NewArgOperands) { + createReplacementValues( + PrivatizableType.getValue(), ACS, + ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()), + NewArgOperands); + }; + + // Collect the types that will replace the privatizable type in the function + // signature. + SmallVector ReplacementTypes; + identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes); + + // Register a rewrite of the argument. + if (A.registerFunctionSignatureRewrite( + *Arg, ReplacementTypes, std::move(FnRepairCB), std::move(ACSRepairCB))) + return ChangeStatus::CHANGED; + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(privatizable_ptr); + } +}; + +struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl { + AAPrivatizablePtrFloating(const IRPosition &IRP) + : AAPrivatizablePtrImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + virtual void initialize(Attributor &A) override { + // TODO: We can privatize more than arguments. + indicatePessimisticFixpoint(); + } + + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable("AAPrivatizablePtr(Floating|Returned|CallSiteReturned)::" + "updateImpl will not be called"); + } + + /// See AAPrivatizablePtrImpl::identifyPrivatizableType(...) + Optional identifyPrivatizableType(Attributor &A) override { + Value *Obj = + GetUnderlyingObject(&getAssociatedValue(), A.getInfoCache().getDL()); + if (!Obj) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] No underlying object found!\n"); + return nullptr; + } + + if (auto *AI = dyn_cast(Obj)) + if (auto *CI = dyn_cast(AI->getArraySize())) + if (CI->isOne()) + return Obj->getType()->getPointerElementType(); + if (auto *Arg = dyn_cast(Obj)) { + auto &PrivArgAA = + A.getAAFor(*this, IRPosition::argument(*Arg)); + if (PrivArgAA.isAssumedPrivatizablePtr()) + return Obj->getType()->getPointerElementType(); + } + + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Underlying object neither valid " + "alloca nor privatizable argument: " + << *Obj << "!\n"); + return nullptr; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(privatizable_ptr); + } +}; + +struct AAPrivatizablePtrCallSiteArgument final + : public AAPrivatizablePtrFloating { + AAPrivatizablePtrCallSiteArgument(const IRPosition &IRP) + : AAPrivatizablePtrFloating(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + PrivatizableType = identifyPrivatizableType(A); + if (!PrivatizableType.hasValue()) + return ChangeStatus::UNCHANGED; + if (!PrivatizableType.getValue()) + return indicatePessimisticFixpoint(); + + const IRPosition &IRP = getIRPosition(); + auto &NoCaptureAA = A.getAAFor(*this, IRP); + if (!NoCaptureAA.isAssumedNoCapture()) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might be captured!\n"); + return indicatePessimisticFixpoint(); + } + + auto &NoAliasAA = A.getAAFor(*this, IRP); + if (!NoAliasAA.isAssumedNoAlias()) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might alias!\n"); + return indicatePessimisticFixpoint(); + } + + const auto &MemBehaviorAA = A.getAAFor(*this, IRP); + if (!MemBehaviorAA.isAssumedReadOnly()) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer is written!\n"); + return indicatePessimisticFixpoint(); + } + + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(privatizable_ptr); + } +}; + +struct AAPrivatizablePtrCallSiteReturned final + : public AAPrivatizablePtrFloating { + AAPrivatizablePtrCallSiteReturned(const IRPosition &IRP) + : AAPrivatizablePtrFloating(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: We can privatize more than arguments. + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(privatizable_ptr); + } +}; + +struct AAPrivatizablePtrReturned final : public AAPrivatizablePtrFloating { + AAPrivatizablePtrReturned(const IRPosition &IRP) + : AAPrivatizablePtrFloating(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: We can privatize more than arguments. + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(privatizable_ptr); + } +}; + /// -------------------- Memory Behavior Attributes ---------------------------- /// Includes read-none, read-only, and write-only. /// ---------------------------------------------------------------------------- @@ -5864,6 +6364,9 @@ // Every argument with pointer type might be marked nofree. getOrCreateAAFor(ArgPos); + + // Every argument with pointer type might be privatizable (or promotable) + getOrCreateAAFor(ArgPos); } } @@ -6095,6 +6598,7 @@ const char AANoCapture::ID = 0; const char AAValueSimplify::ID = 0; const char AAHeapToStack::ID = 0; +const char AAPrivatizablePtr::ID = 0; const char AAMemoryBehavior::ID = 0; // Macro magic to create the static generator function for attributes that @@ -6198,6 +6702,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPrivatizablePtr) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll @@ -3,9 +3,11 @@ define internal i32 @deref(i32* %x) nounwind { ; CHECK-LABEL: define {{[^@]+}}@deref -; CHECK-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) +; CHECK-SAME: (i32 [[TMP0:%.*]]) ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[X]], align 4 +; CHECK-NEXT: [[X_PRIV:%.*]] = alloca i32 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[X_PRIV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[X_PRIV]], align 4 ; CHECK-NEXT: ret i32 [[TMP2]] ; entry: @@ -19,7 +21,8 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32 ; CHECK-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @deref(i32* noalias nocapture nofree nonnull align 4 dereferenceable(4) [[X_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[X_ADDR]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @deref(i32 [[TMP0]]) ; CHECK-NEXT: ret i32 [[TMP1]] ; entry: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll @@ -27,7 +27,7 @@ ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(32) [[TMP]]) +; CHECK-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; CHECK-NEXT: ret void @@ -45,9 +45,11 @@ define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { ; CHECK-LABEL: define {{[^@]+}}@promote_avx2 -; CHECK-SAME: (<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) +; CHECK-SAME: (<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]], align 32 +; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <4 x i64> +; CHECK-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG1_PRIV]] +; CHECK-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1_PRIV]], align 32 ; CHECK-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32 ; CHECK-NEXT: ret void ; @@ -65,7 +67,8 @@ ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(32) [[TMP]]) +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -8,9 +8,11 @@ ; This should promote define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 -; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]]) +; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32 +; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]] +; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 ; CHECK-NEXT: ret void ; @@ -28,7 +30,8 @@ ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP]]) +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; CHECK-NEXT: ret void @@ -47,9 +50,11 @@ ; This should promote define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 -; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]]) +; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32 +; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]] +; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 ; CHECK-NEXT: ret void ; @@ -67,7 +72,8 @@ ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP]]) +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; CHECK-NEXT: ret void @@ -86,9 +92,11 @@ ; This should promote define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 -; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]]) +; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32 +; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]] +; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 ; CHECK-NEXT: ret void ; @@ -106,7 +114,8 @@ ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP]]) +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; CHECK-NEXT: ret void @@ -125,9 +134,11 @@ ; This should promote define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 -; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]]) +; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32 +; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]] +; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 ; CHECK-NEXT: ret void ; @@ -145,7 +156,8 @@ ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP]]) +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; CHECK-NEXT: ret void @@ -184,7 +196,7 @@ ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP]]) +; CHECK-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]]) ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; CHECK-NEXT: ret void @@ -223,7 +235,7 @@ ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP]]) +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]]) ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; CHECK-NEXT: ret void @@ -242,9 +254,11 @@ ; This should promote define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 { ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 -; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]]) +; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32 +; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]] +; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 ; CHECK-NEXT: ret void ; @@ -262,7 +276,8 @@ ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP]]) +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; CHECK-NEXT: ret void @@ -281,9 +296,11 @@ ; This should promote define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 { ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 -; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]]) +; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32 +; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]] +; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 ; CHECK-NEXT: ret void ; @@ -301,7 +318,8 @@ ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP]]) +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll @@ -6,13 +6,20 @@ ; Don't drop 'byval' on %X here. define internal void @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind { ; CHECK-LABEL: define {{[^@]+}}@f -; CHECK-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* nocapture nofree nonnull writeonly byval dereferenceable(4) [[X:%.*]], i32 [[I:%.*]]) +; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[I:%.*]]) ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[X_PRIV:%.*]] = alloca i32 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[X_PRIV]] +; CHECK-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]] +; CHECK-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32* +; CHECK-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]] +; CHECK-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]] +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 ; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 -; CHECK-NEXT: store i32 0, i32* [[X]] +; CHECK-NEXT: store i32 0, i32* [[X_PRIV]] ; CHECK-NEXT: ret void ; entry: @@ -36,7 +43,12 @@ ; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 -; CHECK-NEXT: call void @f(%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[S]], i32* nocapture nofree readonly byval [[X]], i32 zeroext 0) +; CHECK-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]] +; CHECK-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[X]] +; CHECK-NEXT: call void @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]], i32 zeroext 0) ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll @@ -4,9 +4,13 @@ define internal i32 @test(i32* %X, i32* %Y) { ; CHECK-LABEL: define {{[^@]+}}@test -; CHECK-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[Y:%.*]]) -; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[X]], align 4 -; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[Y]], align 4 +; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) +; CHECK-NEXT: [[Y_PRIV:%.*]] = alloca i32 +; CHECK-NEXT: store i32 [[TMP1]], i32* [[Y_PRIV]] +; CHECK-NEXT: [[X_PRIV:%.*]] = alloca i32 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[X_PRIV]] +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[X_PRIV]], align 4 +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[Y_PRIV]], align 4 ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -18,10 +22,14 @@ define internal i32 @caller(i32* %B) { ; CHECK-LABEL: define {{[^@]+}}@caller -; CHECK-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) +; CHECK-SAME: (i32 [[TMP0:%.*]]) +; CHECK-NEXT: [[B_PRIV:%.*]] = alloca i32 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[B_PRIV]] ; CHECK-NEXT: [[A:%.*]] = alloca i32 ; CHECK-NEXT: store i32 1, i32* [[A]], align 4 -; CHECK-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_PRIV]] +; CHECK-NEXT: [[C:%.*]] = call i32 @test(i32 [[TMP2]], i32 [[TMP3]]) ; CHECK-NEXT: ret i32 [[C]] ; %A = alloca i32 @@ -34,7 +42,8 @@ ; CHECK-LABEL: define {{[^@]+}}@callercaller() ; CHECK-NEXT: [[B:%.*]] = alloca i32 ; CHECK-NEXT: store i32 2, i32* [[B]], align 4 -; CHECK-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull align 4 dereferenceable(4) [[B]]) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]] +; CHECK-NEXT: [[X:%.*]] = call i32 @caller(i32 [[TMP1]]) ; CHECK-NEXT: ret i32 [[X]] ; %B = alloca i32 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll @@ -5,13 +5,20 @@ define internal void @f(%struct.ss* byval %b, i32* byval %X) nounwind { ; CHECK-LABEL: define {{[^@]+}}@f -; CHECK-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* nocapture nofree nonnull writeonly byval dereferenceable(4) [[X:%.*]]) +; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]]) ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[X_PRIV:%.*]] = alloca i32 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[X_PRIV]] +; CHECK-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]] +; CHECK-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32* +; CHECK-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]] +; CHECK-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]] +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 ; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 -; CHECK-NEXT: store i32 0, i32* [[X]] +; CHECK-NEXT: store i32 0, i32* [[X_PRIV]] ; CHECK-NEXT: ret void ; entry: @@ -33,7 +40,12 @@ ; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 -; CHECK-NEXT: call void @f(%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[S]], i32* nocapture nofree readonly byval [[X]]) +; CHECK-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]] +; CHECK-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[X]] +; CHECK-NEXT: call void @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]]) ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll @@ -7,9 +7,14 @@ define internal void @f(%struct.ss* byval %b) nounwind { ; CHECK-LABEL: define {{[^@]+}}@f -; CHECK-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]]) +; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]] +; CHECK-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32* +; CHECK-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]] +; CHECK-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]] +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 ; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 @@ -26,9 +31,14 @@ define internal void @g(%struct.ss* byval align 32 %b) nounwind { ; CHECK-LABEL: define {{[^@]+}}@g -; CHECK-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 32 dereferenceable(12) [[B:%.*]]) +; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]] +; CHECK-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32* +; CHECK-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]] +; CHECK-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]] +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 ; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 32 @@ -51,8 +61,16 @@ ; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 -; CHECK-NEXT: call void @f(%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[S]]) -; CHECK-NEXT: call void @g(%struct.ss* noalias nocapture nofree nonnull byval align 32 dereferenceable(12) [[S]]) +; CHECK-NEXT: [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32* +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST1]] +; CHECK-NEXT: [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_12]] +; CHECK-NEXT: call void @f(i32 [[TMP0]], i64 [[TMP1]]) +; CHECK-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[S_CAST]] +; CHECK-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[S_0_1]] +; CHECK-NEXT: call void @g(i32 [[TMP2]], i64 [[TMP3]]) ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll @@ -5,12 +5,14 @@ define internal i32 @callee(i1 %C, i32* %P) { ; CHECK-LABEL: define {{[^@]+}}@callee -; CHECK-SAME: (i1 [[C:%.*]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) +; CHECK-SAME: (i1 [[C:%.*]], i32 [[TMP0:%.*]]) +; CHECK-NEXT: [[P_PRIV:%.*]] = alloca i32 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[P_PRIV]] ; CHECK-NEXT: br i1 false, label [[T:%.*]], label [[F:%.*]] ; CHECK: T: ; CHECK-NEXT: unreachable ; CHECK: F: -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]], align 4 +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P_PRIV]], align 4 ; CHECK-NEXT: ret i32 [[X]] ; br i1 %C, label %T, label %F @@ -27,7 +29,8 @@ ; CHECK-LABEL: define {{[^@]+}}@foo() ; CHECK-NEXT: [[A:%.*]] = alloca i32 ; CHECK-NEXT: store i32 17, i32* [[A]], align 4 -; CHECK-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32* noalias nocapture nofree nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]] +; CHECK-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32 [[TMP1]]) ; CHECK-NEXT: ret i32 [[X]] ; %A = alloca i32 ; [#uses=2] diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll @@ -28,8 +28,13 @@ define internal i32 @test_byval_used(%struct.pair* byval %P) { ; CHECK-LABEL: define {{[^@]+}}@test_byval_used -; CHECK-SAME: (%struct.pair* nocapture nofree nonnull readonly byval align 8 dereferenceable(4) [[P:%.*]]) -; CHECK-NEXT: [[C:%.*]] = bitcast %struct.pair* [[P]] to i32* +; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) +; CHECK-NEXT: [[P_PRIV:%.*]] = alloca [[STRUCT_PAIR:%.*]] +; CHECK-NEXT: [[P_PRIV_CAST:%.*]] = bitcast %struct.pair* [[P_PRIV]] to i32* +; CHECK-NEXT: store i32 [[TMP0]], i32* [[P_PRIV_CAST]] +; CHECK-NEXT: [[P_PRIV_0_1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P_PRIV]], i32 0, i32 1 +; CHECK-NEXT: store i32 [[TMP1]], i32* [[P_PRIV_0_1]] +; CHECK-NEXT: [[C:%.*]] = bitcast %struct.pair* [[P_PRIV]] to i32* ; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[C]], align 8 ; CHECK-NEXT: ret i32 [[V]] ; @@ -43,7 +48,11 @@ ; CHECK-SAME: (i32** nocapture readonly [[Y:%.*]], %struct.pair* nocapture nofree readonly [[P:%.*]]) ; CHECK-NEXT: call void @test(i32** nocapture readonly [[Y]]), !dbg !4 ; CHECK-NEXT: call void @test_byval(), !dbg !5 -; CHECK-NEXT: [[V:%.*]] = call i32 @test_byval_used(%struct.pair* nocapture nofree readonly [[P]]), !dbg !5 +; CHECK-NEXT: [[P_CAST:%.*]] = bitcast %struct.pair* [[P]] to i32*, !dbg !5 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[P_CAST]] +; CHECK-NEXT: [[P_0_1:%.*]] = getelementptr [[STRUCT_PAIR:%.*]], %struct.pair* [[P]], i32 0, i32 1, !dbg !5 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[P_0_1]] +; CHECK-NEXT: [[V:%.*]] = call i32 @test_byval_used(i32 [[TMP1]], i32 [[TMP2]]), !dbg !5 ; CHECK-NEXT: ret i32 [[V]] ; call void @test(i32** %Y), !dbg !1 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll @@ -19,10 +19,15 @@ ; ATTRIBUTOR-NEXT: ret i32 [[R]] ; ; GLOBALOPT_ATTRIBUTOR-LABEL: define {{[^@]+}}@f -; GLOBALOPT_ATTRIBUTOR-SAME: (%struct.ss* noalias nocapture nofree nonnull readonly align 4 dereferenceable(8) [[S:%.*]]) unnamed_addr +; GLOBALOPT_ATTRIBUTOR-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) unnamed_addr ; GLOBALOPT_ATTRIBUTOR-NEXT: entry: -; GLOBALOPT_ATTRIBUTOR-NEXT: [[F0:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[S]], i32 0, i32 0 -; GLOBALOPT_ATTRIBUTOR-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; GLOBALOPT_ATTRIBUTOR-NEXT: [[S_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]] +; GLOBALOPT_ATTRIBUTOR-NEXT: [[S_PRIV_CAST:%.*]] = bitcast %struct.ss* [[S_PRIV]] to i32* +; GLOBALOPT_ATTRIBUTOR-NEXT: store i32 [[TMP0]], i32* [[S_PRIV_CAST]] +; GLOBALOPT_ATTRIBUTOR-NEXT: [[S_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S_PRIV]], i32 0, i32 1 +; GLOBALOPT_ATTRIBUTOR-NEXT: store i32 [[TMP1]], i32* [[S_PRIV_0_1]] +; GLOBALOPT_ATTRIBUTOR-NEXT: [[F0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S_PRIV]], i32 0, i32 0 +; GLOBALOPT_ATTRIBUTOR-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S_PRIV]], i32 0, i32 1 ; GLOBALOPT_ATTRIBUTOR-NEXT: [[A:%.*]] = load i32, i32* [[F0]], align 4 ; GLOBALOPT_ATTRIBUTOR-NEXT: [[B:%.*]] = load i32, i32* [[F1]], align 4 ; GLOBALOPT_ATTRIBUTOR-NEXT: [[R:%.*]] = add i32 [[A]], [[B]] @@ -55,7 +60,11 @@ ; GLOBALOPT_ATTRIBUTOR-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; GLOBALOPT_ATTRIBUTOR-NEXT: store i32 1, i32* [[F0]], align 4 ; GLOBALOPT_ATTRIBUTOR-NEXT: store i32 2, i32* [[F1]], align 4 -; GLOBALOPT_ATTRIBUTOR-NEXT: [[R:%.*]] = call fastcc i32 @f(%struct.ss* noalias nocapture nofree nonnull align 4 dereferenceable(8) [[S]]) +; GLOBALOPT_ATTRIBUTOR-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* +; GLOBALOPT_ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]] +; GLOBALOPT_ATTRIBUTOR-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; GLOBALOPT_ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i32, i32* [[S_0_1]] +; GLOBALOPT_ATTRIBUTOR-NEXT: [[R:%.*]] = call fastcc i32 @f(i32 [[TMP0]], i32 [[TMP1]]) ; GLOBALOPT_ATTRIBUTOR-NEXT: ret i32 [[R]] ; entry: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll @@ -8,7 +8,8 @@ ; CHECK-LABEL: define {{[^@]+}}@caller() ; CHECK-NEXT: [[X:%.*]] = alloca i32 ; CHECK-NEXT: store i32 42, i32* [[X]], align 4 -; CHECK-NEXT: call void @promote_i32_ptr(i32* noalias nocapture nonnull align 4 dereferenceable(4) [[X]]), !prof !0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]] +; CHECK-NEXT: call void @promote_i32_ptr(i32 [[TMP1]]), !prof !0 ; CHECK-NEXT: ret void ; %x = alloca i32 @@ -19,8 +20,10 @@ define internal void @promote_i32_ptr(i32* %xp) { ; CHECK-LABEL: define {{[^@]+}}@promote_i32_ptr -; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[XP:%.*]]) -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[XP]], align 4 +; CHECK-SAME: (i32 [[TMP0:%.*]]) +; CHECK-NEXT: [[XP_PRIV:%.*]] = alloca i32 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[XP_PRIV]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[XP_PRIV]], align 4 ; CHECK-NEXT: call void @use_i32(i32 [[X]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll @@ -28,7 +28,7 @@ ; CHECK-LABEL: define {{[^@]+}}@f() ; CHECK-NEXT: [[R:%.*]] = alloca i32 ; CHECK-NEXT: [[PAIR:%.*]] = alloca { i32, i32 } -; CHECK-NEXT: call void @add({ i32, i32 }* noalias nocapture nofree nonnull align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nofree nonnull sret align 4 dereferenceable(4) [[R]]) +; CHECK-NEXT: call void @add({ i32, i32 }* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R]]) ; CHECK-NEXT: ret void ; %r = alloca i32 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll @@ -10,8 +10,13 @@ define internal void @bar(%pair* byval %Data) { ; CHECK-LABEL: define {{[^@]+}}@bar -; CHECK-SAME: (%pair* byval [[DATA:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = tail call i8* @foo(%pair* [[DATA]]) +; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) +; CHECK-NEXT: [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]] +; CHECK-NEXT: [[DATA_PRIV_CAST:%.*]] = bitcast %pair* [[DATA_PRIV]] to i32* +; CHECK-NEXT: store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]] +; CHECK-NEXT: [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1 +; CHECK-NEXT: store i32 [[TMP1]], i32* [[DATA_PRIV_0_1]] +; CHECK-NEXT: [[TMP3:%.*]] = call i8* @foo(%pair* [[DATA_PRIV]]) ; CHECK-NEXT: ret void ; tail call i8* @foo(%pair* %Data) @@ -21,7 +26,11 @@ define void @zed(%pair* byval %Data) { ; CHECK-LABEL: define {{[^@]+}}@zed ; CHECK-SAME: (%pair* nocapture readonly byval [[DATA:%.*]]) -; CHECK-NEXT: call void @bar(%pair* nocapture readonly byval [[DATA]]) +; CHECK-NEXT: [[DATA_CAST:%.*]] = bitcast %pair* [[DATA]] to i32* +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DATA_CAST]] +; CHECK-NEXT: [[DATA_0_1:%.*]] = getelementptr [[PAIR:%.*]], %pair* [[DATA]], i32 0, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DATA_0_1]] +; CHECK-NEXT: call void @bar(i32 [[TMP1]], i32 [[TMP2]]) ; CHECK-NEXT: ret void ; call void @bar(%pair* byval %Data) diff --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll --- a/llvm/test/Transforms/Attributor/callbacks.ll +++ b/llvm/test/Transforms/Attributor/callbacks.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes ; FIXME: Add -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations below. ; This flag was removed because max iterations is 2 in most cases, but in windows it is 1. ; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-annotate-decl-cs < %s | FileCheck %s @@ -16,7 +16,8 @@ ; FIXME: The callee -> call site direction is not working yet. define void @t0_caller(i32* %a) { -; CHECK-LABEL: @t0_caller( +; CHECK-LABEL: define {{[^@]+}}@t0_caller +; CHECK-SAME: (i32* [[A:%.*]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 32 ; CHECK-NEXT: [[C:%.*]] = alloca i32*, align 64 @@ -24,7 +25,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; CHECK-NEXT: store i32 42, i32* [[B]], align 32 ; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64 -; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A:%.*]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) ; CHECK-NEXT: ret void ; entry: @@ -41,12 +42,13 @@ ; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below! ; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call. define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { -; CHECK-LABEL: @t0_callback_callee( +; CHECK-LABEL: define {{[^@]+}}@t0_callback_callee +; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR:%.*]], align 8 -; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C:%.*]], align 64 -; CHECK-NEXT: tail call void @t0_check(i32* align 256 [[A:%.*]], i64 99, i32* [[TMP0]]) +; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 +; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64 +; CHECK-NEXT: tail call void @t0_check(i32* align 256 [[A]], i64 99, i32* [[TMP0]]) ; CHECK-NEXT: ret void ; entry: @@ -61,5 +63,112 @@ declare !callback !0 void @t0_callback_broker(i32*, i32*, void (i32*, i32*, ...)*, ...) +; Test 1 +; +; Similar to test 0 but with some additional annotations (noalias/nocapute) to make sure +; we deduce and propagate noalias and others properly. + +define void @t1_caller(i32* noalias %a) { +; CHECK-LABEL: define {{[^@]+}}@t1_caller +; CHECK-SAME: (i32* noalias [[A:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 32 +; CHECK-NEXT: [[C:%.*]] = alloca i32*, align 64 +; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 128 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: store i32 42, i32* [[B]], align 32 +; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64 +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: ret void +; +entry: + %b = alloca i32, align 32 + %c = alloca i32*, align 64 + %ptr = alloca i32, align 128 + %0 = bitcast i32* %b to i8* + store i32 42, i32* %b, align 4 + store i32* %b, i32** %c, align 8 + call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c) + ret void +} + +; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below! +; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call. +define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { +; CHECK-LABEL: define {{[^@]+}}@t1_callback_callee +; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 +; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64 +; CHECK-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]]) +; CHECK-NEXT: ret void +; +entry: + %ptr_val = load i32, i32* %ptr, align 8 + store i32 %ptr_val, i32* %is_not_null + %0 = load i32*, i32** %c, align 8 + tail call void @t1_check(i32* %a, i64 %b, i32* %0) + ret void +} + +declare void @t1_check(i32* nocapture align 256, i64, i32* nocapture) nosync + +declare !callback !0 void @t1_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...) + +; Test 2 +; +; Similar to test 1 but checking that the noalias is only placed if potential synchronization through @t2_check is preserved. + +define void @t2_caller(i32* noalias %a) { +; CHECK-LABEL: define {{[^@]+}}@t2_caller +; CHECK-SAME: (i32* noalias [[A:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 32 +; CHECK-NEXT: [[C:%.*]] = alloca i32*, align 64 +; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 128 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: store i32 42, i32* [[B]], align 32 +; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64 +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: ret void +; +entry: + %b = alloca i32, align 32 + %c = alloca i32*, align 64 + %ptr = alloca i32, align 128 + %0 = bitcast i32* %b to i8* + store i32 42, i32* %b, align 4 + store i32* %b, i32** %c, align 8 + call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c) + ret void +} + +; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below! +; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call. +; +; FIXME: We should derive noalias for %a and add a "fake use" of %a in all potentially synchronizing calls. +define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { +; CHECK-LABEL: define {{[^@]+}}@t2_callback_callee +; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 +; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64 +; CHECK-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]]) +; CHECK-NEXT: ret void +; +entry: + %ptr_val = load i32, i32* %ptr, align 8 + store i32 %ptr_val, i32* %is_not_null + %0 = load i32*, i32** %c, align 8 + tail call void @t2_check(i32* %a, i64 %b, i32* %0) + ret void +} + +declare void @t2_check(i32* nocapture align 256, i64, i32* nocapture) + +declare !callback !0 void @t2_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...) + !0 = !{!1} !1 = !{i64 2, i64 -1, i64 -1, i1 true}