Index: lib/Transforms/IPO/FunctionAttrs.cpp =================================================================== --- lib/Transforms/IPO/FunctionAttrs.cpp +++ lib/Transforms/IPO/FunctionAttrs.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" @@ -66,15 +67,27 @@ STATISTIC(NumReadNone, "Number of functions marked readnone"); STATISTIC(NumReadOnly, "Number of functions marked readonly"); -STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); STATISTIC(NumReturned, "Number of arguments marked returned"); +STATISTIC(NumNoCaptureArg, "Number of arguments marked nocapture"); STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); +STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly"); +STATISTIC(NumNonNullArg, "Number of arguments marked nonnull"); +STATISTIC(NumAlignmentArg, "Number of arguments marked with alignment information"); +STATISTIC(NumDereferencableArg, "Number of arguments marked as dereferencable"); +STATISTIC(NumDereferencableOrNullArg, + "Number of arguments marked as dereferencable or null"); +STATISTIC(NumNoAliasArg, "Number of arguments marked noalias"); STATISTIC(NumNoAlias, "Number of function returns marked noalias"); STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); +STATISTIC(NumFunctionsWithStaticCallSites, + "Number of functions with statically known call sites"); +STATISTIC(NumSCCsWithStaticCallSites, + "Number of SCCs with statically known call sites"); + // FIXME: This is disabled by default to avoid exposing security vulnerabilities // in C/C++ code compiled by clang: // http://lists.llvm.org/pipermail/cfe-dev/2017-January/052066.html @@ -87,6 +100,10 @@ "disable-nounwind-inference", cl::Hidden, cl::desc("Stop inferring nounwind attribute during function-attrs pass")); +static cl::opt DisableCalleeArgumentAttrInference( + "disable-callee-arg-attr-inference", cl::Hidden, + cl::desc("Do not infer callee argument attributes from call sites")); + namespace { using SCCNodeSet = SmallSetVector; @@ -528,6 +545,700 @@ return IsRead ? Attribute::ReadOnly : Attribute::ReadNone; } +/// Definition of parameter attributes we want to propagate from call sites +/// (direct and callback) to a callee. This macro construct allows us to +/// minimize explicit code duplication. The properties we define here are the +/// following: +/// - NAME: to identify the attribute. +/// - KIND: to interact with other parts of LLVM (Attribute::AttrKind). +/// - INIT: initial value associated with this attribute. +#define ABSTRACT_ARGUMENT_ATTRIBUTES() \ + ABSTRACT_ARGUMENT_ATTR_FUNC(NonNull, Attribute::NonNull, true) \ + ABSTRACT_ARGUMENT_ATTR_FUNC(NoCapture, Attribute::NoCapture, true) \ + ABSTRACT_ARGUMENT_ATTR_FUNC(ReadNone, Attribute::ReadNone, true) \ + ABSTRACT_ARGUMENT_ATTR_FUNC(ReadOnly, Attribute::ReadOnly, true) \ + ABSTRACT_ARGUMENT_ATTR_FUNC(WriteOnly, Attribute::WriteOnly, true) \ + ABSTRACT_ARGUMENT_ATTR_FUNC(NoAlias, Attribute::NoAlias, true) \ + ABSTRACT_ARGUMENT_ATTR_FUNC(Alignment, Attribute::Alignment, 0x40000000u) \ + ABSTRACT_ARGUMENT_ATTR_FUNC(Dereferencable, Attribute::Dereferenceable, \ + 0x40000000u) \ + ABSTRACT_ARGUMENT_ATTR_FUNC(DereferencableOrNull, \ + Attribute::DereferenceableOrNull, 0x40000000u) + +/// Helper data structure to manage parameter attributes. +struct ArgumentAttributeSet { + +// Each attribute defined in the ABSTRACT_ARGUMENT_ATTRIBUTES macro above is +// expanded to a member variable ("a_" + NAME + "_val") that cannot be accessed +// directly but only through the generated getter and setter functions. +#define ABSTRACT_ARGUMENT_ATTR_FUNC(NAME, KIND, INIT) \ +private: \ + decltype(INIT) a_##NAME##_val; \ + \ +public: \ + decltype(INIT) get##NAME##Attr() const { return a_##NAME##_val; } \ + bool has##NAME##Attr() const { return a_##NAME##_val; } \ + void set##NAME##Attr(decltype(INIT) Val = INIT) { a_##NAME##_val = Val; } \ + void clear##NAME##Attr() { a_##NAME##_val = 0; } + + ABSTRACT_ARGUMENT_ATTRIBUTES() +#undef ABSTRACT_ARGUMENT_ATTR_FUNC + + // Flag to simplify the check for any attribute. It is set by the clear() + // function and when hasAny() returns false. + bool HasAny = true; + + // Check if any attribute is set. + bool hasAny() { + if (!HasAny) + return false; + + bool Any = false; +#define ABSTRACT_ARGUMENT_ATTR_FUNC(NAME, KIND, INIT) Any |= has##NAME##Attr(); + + ABSTRACT_ARGUMENT_ATTRIBUTES() +#undef ABSTRACT_ARGUMENT_ATTR_FUNC + return HasAny = Any; + } + + // Clear all attributes. + void clear() { + HasAny = false; +#define ABSTRACT_ARGUMENT_ATTR_FUNC(NAME, KIND, INIT) clear##NAME##Attr(); + + ABSTRACT_ARGUMENT_ATTRIBUTES() +#undef ABSTRACT_ARGUMENT_ATTR_FUNC + } +}; + +/// Helper data structure to avoid the use of call sites. This is desired as +/// we want to apply the same reasoning to callback call sites later on. +struct AbstractCallSite { + // The callee function. + Function *Callee; + + // The instruction that transfers control (indirectly) to the Callee. + Instruction *I; + + // Parameters passed to the Callee at the (callback) call site I. + SmallVector Params; + + AbstractCallSite(Function *Callee, Instruction *I) : Callee(Callee), I(I) {} +}; + +/// Wrapper around a function argument and the current abstract state expressed +/// as a set of argument attributes. Note that access to the members is direct +/// and the constructor will initialize the attribute set according to the given +/// attribute (Arg). +struct AbstractArgument { + // The argument this abstract argument represents. + Argument *Arg; + + // The set of attributes for the argument Arg. + ArgumentAttributeSet Attrs; + + // Simple constructor that initializes the attribute set according to the + // properties of @p Arg and its parent function. + AbstractArgument(Argument &Arg) : Arg(&Arg) { + +#define ABSTRACT_ARGUMENT_ATTR_FUNC(NAME, KIND, INIT) \ + if (!Arg.hasAttribute(KIND) && !Arg.getParent()->hasFnAttribute(KIND)) \ + Attrs.set##NAME##Attr(); \ + else \ + Attrs.clear##NAME##Attr(); + + ABSTRACT_ARGUMENT_ATTRIBUTES() +#undef ABSTRACT_ARGUMENT_ATTR_FUNC + + } +}; + +// Generation of helper functions that lookup the abstract state for an +// attribute given an argument @p Arg and a abstract state relation for the SCC. +// For all attributes we generate the function +// "get" + NAME + "AbstractArgumentValue" +// which returns the current value of the abstract argument associated with +// @p Arg, if there is any. If not, the boolean @p Found is set to false. +// +// For boolean attributes we additionally generate lookup functions +// "isValid" + NAME + "AbstractArgument" +// that only return true if the abstract value associated with @p Arg exists and +// the attribute in question is set. +// +#define ABSTRACT_ARGUMENT_ATTR_FUNC(NAME, KIND, INIT) \ + template \ + static decltype(INIT) LLVM_ATTRIBUTE_UNUSED \ + get##NAME##AbstractArgumentValue( \ + Argument *Arg, bool &Found, \ + FunctionPointerArgumentsMapTy &FunctionPointerArgumentsMap) { \ + Found = false; \ + /* If the argument is null or its parent is not in the current SCC, \ + * return zero and indicate that the abstract attribute was not found */ \ + if (!Arg) \ + return 0; \ + auto It = FunctionPointerArgumentsMap.find(Arg->getParent()); \ + if (It == FunctionPointerArgumentsMap.end()) \ + return 0; \ + Found = true; \ + for (AbstractArgument & AA : It->second) \ + if (AA.Arg == Arg) \ + return AA.Attrs.get##NAME##Attr(); \ + llvm_unreachable("Argument was not mapped to an abstract argument!\n"); \ + } \ + \ + template ::value>> \ + static bool LLVM_ATTRIBUTE_UNUSED isValid##NAME##AbstractArgument( \ + Argument *Arg, \ + FunctionPointerArgumentsMapTy &FunctionPointerArgumentsMap) { \ + bool Found; \ + return get##NAME##AbstractArgumentValue(Arg, Found, \ + FunctionPointerArgumentsMap); \ + } + +ABSTRACT_ARGUMENT_ATTRIBUTES() +#undef ABSTRACT_ARGUMENT_ATTR_FUNC + +/// Helper to determine the dereferencability of the parameter @p Param. +/// Only if @p AllowNull is set, the parameter @p Param is allowed to be null. +template +static uint64_t getParameterDereferencability( + Value *Param, const DataLayout &DL, bool AllowNull, + FunctionPointerArgumentsMapTy &FunctionPointerArgumentsMap) { + + // If the parameter is an argument we first try to determine the + // value based on the abstract state that might be mapped to it. + bool Found; + uint64_t Value = 0; + if (Argument *ParamArg = dyn_cast(Param)) { + if (AllowNull) + Value = getDereferencableOrNullAbstractArgumentValue( + ParamArg, Found, FunctionPointerArgumentsMap); + else + Value = getDereferencableAbstractArgumentValue( + ParamArg, Found, FunctionPointerArgumentsMap); + } + + // If a value was set, we cannot do better. + if (Found) + return Value; + + // Otherwise, determine the dereferencability explicitly. + bool CanBeNull; + uint64_t DereferencableBytes = + Param->getPointerDereferenceableBytes(DL, CanBeNull); + if (!CanBeNull) + return DereferencableBytes; + return AllowNull ? DereferencableBytes : 0; +} + +/// Helper to determine the non-null property of the parameter @p Param. +/// To this end, we check the abstract state of @p Arg in the SCC as well as +/// the annotations of @p Param at the call sites. +template +static bool +isNonNullParameter(AbstractCallSite &ACS, Value *Param, + Argument *ParamObjectArg, + FunctionPointerArgumentsMapTy &FunctionPointerArgumentsMap) { + if (!EnableNonnullArgPropagation) + return false; + + // First check if the parameter (Param) is actually an argument. If so, we + // check if it is marked as non-null or if the abstract state associated with + // that argument is. + if (ParamObjectArg && (ParamObjectArg->hasNonNullAttr() || + isValidNonNullAbstractArgument( + ParamObjectArg, FunctionPointerArgumentsMap))) + return true; + + // If the above check failed, we try to look at the call site. + CallSite CS(ACS.I); + if (!CS) + return false; + + // If a call site is found, we check if the first occurence of the parameter + // (Param) has a non-null attribute. + unsigned NumArgs = CS.getNumArgOperands(); + for (unsigned Idx = 0; Idx < NumArgs; Idx++) + if (CS.getArgument(Idx) == Param) + return CS.paramHasAttr(Idx, Attribute::NonNull); + + llvm_unreachable("Parameter not found in call site!"); +} + +/// Helper to determine if the parameter @p Param is not aliasing with anything +/// once assigned to the argument identified by the abstract argument @p AA. +template +static bool isValidNoAliasParameter( + AbstractArgument &AA, Value *Param, Argument *ParamObjectArg, + AbstractCallSite &ACS, AAResults &AAR, const DominatorTree &DT, + FunctionPointerArgumentsMapTy &FunctionPointerArgumentsMap) { + + // If ParamObjectArg (the underlying object of Param) is marked as + // no-alias, Param is no-alias in the context of the callee. + if (ParamObjectArg && ParamObjectArg->hasNoAliasAttr()) + return true; + + // If the callee only accesses memory accesible through the arguments we + // can skip the function local and captured checks and simply proceed to + // the alias check between the parameters. + if (!AAR.onlyAccessesArgPointees( + AAR.getModRefBehavior(AA.Arg->getParent()))) { + + // Check if Param is function local in the caller context either explicitly + // or through the abstract state associated with it in the SCC. If not, we + // cannot conclude no-alias in the callee context. + bool IsFunctionLocal = isIdentifiedFunctionLocal(Param) || + isValidNoAliasAbstractArgument( + ParamObjectArg, FunctionPointerArgumentsMap); + if (!IsFunctionLocal) + return false; + + // We also conservatively require the parameter Param to be "uncaptured" in + // the caller up to the (callback) call. + bool IsCapturedPtr = + PointerMayBeCapturedBefore(Param, false, true, ACS.I, &DT) && + !isValidNoCaptureAbstractArgument(ParamObjectArg, + FunctionPointerArgumentsMap); + if (IsCapturedPtr) + return false; + } + + // If the callee only accesses memory through arguments, or the parameter + // (Param) is function local in the caller and not captured up to the call + // site, we still have to ensure there is no other pointer argument it (Param) + // could alias with. Only then the no-alias property holds in the callee. + unsigned NumArgs = ACS.Params.size(); + for (unsigned Idx = 0; Idx < NumArgs; Idx++) { + if (Idx == AA.Arg->getArgNo()) + continue; + if (!ACS.Params[Idx]) + continue; + if (!AAR.isNoAlias(Param, ACS.Params[Idx])) + return false; + } + + return true; +} + +/// Transformer function that removes all invalid attributes from the abstract +/// state of an argument in the SCC. It is executed for each abstract call site +/// @p ACS with the mapping from parameter @p Param in the caller to abstract +/// argument @p AA in the callee. +template +static bool removeInvalidAttributes( + Value *Param, AbstractArgument &AA, AbstractCallSite &ACS, + const DataLayout &DL, AAResults &AAR, const DominatorTree &DT, + FunctionPointerArgumentsMapTy &FunctionPointerArgumentsMap) { + + // This should only be called if any attribute is set in the abstract state. + assert(AA.Attrs.hasAny() && "Cannot remove attributes since there are none!"); + + // If the parameter is not specified we clear all attributes. + if (!Param) { + AA.Attrs.clear(); + return true; + } + + // If a parameter was given we check all attributes that are currently set + // explicitly. + bool Changed = false; + + // TODO: Even though the "ABSTRACT_ARGUMENT_ATTR_FUNC" macro is not that nice + // it often helped to write more concise code. However, in the following + // we treat every attribute different and we should think about a + // cleaner way to do this. + if (AA.Attrs.hasAlignmentAttr()) { + uint64_t ParameterAlignment = Param->getPointerAlignment(DL); + + auto OldAlignment = AA.Attrs.getAlignmentAttr(); + if (ParameterAlignment == 0) { + AA.Attrs.clearAlignmentAttr(); + } else { + AA.Attrs.setAlignmentAttr(GreatestCommonDivisor64( + AA.Attrs.getAlignmentAttr(), ParameterAlignment)); + if (AA.Attrs.getAlignmentAttr() == 1) + AA.Attrs.clearAlignmentAttr(); + } + if (OldAlignment != AA.Attrs.getAlignmentAttr()) + Changed = true; + } + + if (AA.Attrs.hasDereferencableAttr()) { + uint64_t ParameterDereferencability = getParameterDereferencability( + Param, DL, /* AllowNull */ false, FunctionPointerArgumentsMap); + if (AA.Attrs.getDereferencableAttr() > ParameterDereferencability) { + AA.Attrs.setDereferencableAttr(ParameterDereferencability); + Changed = true; + } + } + + if (AA.Attrs.hasDereferencableOrNullAttr()) { + uint64_t ParameterDereferencabilityOrNull = getParameterDereferencability( + Param, DL, /* AllowNull */ true, FunctionPointerArgumentsMap); + if (AA.Attrs.getDereferencableOrNullAttr() > + ParameterDereferencabilityOrNull) { + AA.Attrs.setDereferencableOrNullAttr(ParameterDereferencabilityOrNull); + Changed = true; + } + } + + // Determine the function argument that is behind the parameter. Thus, this is + // only non-null if the parameter (Param) at the call site is actually a + // function argument of the calller. + Argument *ParamObjectArg = dyn_cast(GetUnderlyingObject(Param, DL)); + + if (AA.Attrs.hasNonNullAttr()) { + if (!isNonNullParameter(ACS, Param, ParamObjectArg, + FunctionPointerArgumentsMap)) { + AA.Attrs.clearNonNullAttr(); + Changed = true; + } + } + + if (AA.Attrs.hasNoAliasAttr() && + !isValidNoAliasParameter(AA, Param, ParamObjectArg, ACS, AAR, DT, + FunctionPointerArgumentsMap)) { + AA.Attrs.clearNoAliasAttr(); + Changed = true; + } + + if (AA.Attrs.hasNoCaptureAttr() && + (!ParamObjectArg || (!ParamObjectArg->hasNoCaptureAttr() && + !isValidNoCaptureAbstractArgument( + ParamObjectArg, FunctionPointerArgumentsMap)))) { + AA.Attrs.clearNoCaptureAttr(); + Changed = true; + } + + if (AA.Attrs.hasReadNoneAttr() && + (!ParamObjectArg || + (!ParamObjectArg->hasAttribute(Attribute::ReadNone) && + !AAR.doesNotAccessMemory(ParamObjectArg->getParent()) && + !isValidReadNoneAbstractArgument(ParamObjectArg, + FunctionPointerArgumentsMap)))) { + AA.Attrs.clearReadNoneAttr(); + Changed = true; + } + + if (AA.Attrs.hasReadOnlyAttr() && + (!ParamObjectArg || (!ParamObjectArg->hasAttribute(Attribute::ReadOnly) && + !AAR.onlyReadsMemory(ParamObjectArg->getParent()) && + !isValidReadOnlyAbstractArgument( + ParamObjectArg, FunctionPointerArgumentsMap)))) { + AA.Attrs.clearReadOnlyAttr(); + Changed = true; + } + + if (AA.Attrs.hasWriteOnlyAttr() && + (!ParamObjectArg || + (!ParamObjectArg->hasAttribute(Attribute::WriteOnly) && + !AAR.doesNotReadMemory( + AAR.getModRefBehavior(ParamObjectArg->getParent())) && + !isValidWriteOnlyAbstractArgument(ParamObjectArg, + FunctionPointerArgumentsMap)))) { + AA.Attrs.clearWriteOnlyAttr(); + Changed = true; + } + + return Changed; +} + +/// Helper to collect all direct and callback call sites of the functions in the +/// SCC (@p SCCNodes). To reduce the number of analysis calls later, we collect +/// all call sites of a function in @p FunctionCallSitesMap. The pointer +/// arguments of these functions are, together with an initial abstract state, +/// collected in the @p FunctionPointerArgumentsMap. +template +static void collectAllCallSites( + SCCNodesTy &SCCNodes, FunctionCallSitesMapTy &FunctionCallSitesMap, + FunctionPointerArgumentsMapTy &FunctionPointerArgumentsMap) { + + // Get the container type used for abstract arguments. + using AbstractArgumentsTy = typename std::remove_reference::type; + + for (Function *F : SCCNodes) { + // We derive attributes only for internal functions as we otherwise do not + // know all call sites. + if (!F->hasInternalLinkage()) { + // TODO: This is the point where we could think about deriving attributes + // for the call sites despite outside callers and, if they seem + // beneficial, we could duplicate the callee. + continue; + } + + // If a function has internal linkage, we collect all pointer arguments. + AbstractArgumentsTy AbstractArguments; + for (Argument &Arg : F->args()) + if (Arg.getType()->isPointerTy()) + AbstractArguments.emplace_back(Arg); + + // If no pointer arguments are present, the function does not need to be + // processed further. However, as with other potential problems checked for + // in the following, we do not give up on the SCC. + if (AbstractArguments.empty()) + continue; + + // In the next step we determine all direct and callback call sites for + // the function. To this end, we iterate over the uses and check if it is + // a direct call or potentially a callback call that we know about. + SmallVector FUses; + for (const Use &FUse : F->uses()) + FUses.push_back(&FUse); + + bool OnlyValidUsers = true; + while (!FUses.empty()) { + const Use *FUse = FUses.pop_back_val(); + + // Casts are ignored but the uses are processed. + if (ConstantExpr *CE = dyn_cast(FUse->get())) { + for (const Use &CEUse : CE->uses()) + FUses.push_back(&CEUse); + continue; + } + + Value *UserVal = FUse->getUser()->stripPointerCasts(); + if (UserVal == F) + continue; + + // We require a call site and a known called function for both, direct as + // well as callback calls. + CallSite CS(UserVal); + + // TODO: Allow a bit cast of the called value here. + Function *CSCallee = CS ? CS.getCalledFunction() : nullptr; + if (!CSCallee) { + OnlyValidUsers = false; + break; + } + + // Additionally, we require the caller code to be exactly what we will + // execute at runtime. + Function *Caller = CS.getCaller(); + if (!Caller->hasExactDefinition()) { + OnlyValidUsers = false; + break; + } + + // Next we determine the callback calls for this call site. + // TODO: The identification of callback calls is not part of the initial + // patch that derives callee attributes from call sites. + // Consequently, we assume no callback calls in the following. + + // First handle direct calls and then callback calls. They are + // distinguished through the called function of the call site. + if (CSCallee == F) { + + // For a direct call we additionally have to check that the callee is + // not also passed as an argument. We do this by checking that the use + // of the callee function (FUse) is actually the call site function + // argument. + if (!CS.isCallee(FUse)) { + OnlyValidUsers = false; + break; + } + + // If the call is direct and the function use is the called function in + // the call site, we create an abstract call site. It is defined by the + // called function (CSCallee), the instruction that invokes it + // (CS.getInstruction()), and the parameters passed to the pointer + // arguments. + auto &AbstractCallSites = FunctionCallSitesMap[Caller]; + AbstractCallSites.push_back( + AbstractCallSite(CSCallee, CS.getInstruction())); + AbstractCallSite &ACS = AbstractCallSites.back(); + + for (AbstractArgument &AA : AbstractArguments) + ACS.Params.push_back(CS.getArgOperand(AA.Arg->getArgNo())); + + // Direct call is done, no need to check for a callback call. + } else { + + // Callback calls are not yet supported. + OnlyValidUsers = false; + break; + + } + } + + // Only if all call sites of the function are valid, thus statically known, + // we can derive information from them. If not, we simply do not add the + // function to the FunctionPointerArgumentsMap. + if (OnlyValidUsers) + FunctionPointerArgumentsMap[F] = std::move(AbstractArguments); + } +} + +template +static bool deriveCalleeArgumentAttrs(ArrayRef SCCNodes, + AARGetterT &AARGetter, + DomTreeGetterTy &DomTreeGetter) { + DenseMap> FunctionCallSitesMap; + DenseMap> + FunctionPointerArgumentsMap; + + // First collect all call sites (direct and callback calls) for the functions + // in the SCC. + collectAllCallSites(SCCNodes, FunctionCallSitesMap, + FunctionPointerArgumentsMap); + + // If there was no internal function with pointer arguments and valid, thus + // statically known, call sites, we are done. + if (FunctionCallSitesMap.empty()) + return false; + + // Bookkeeping. + NumFunctionsWithStaticCallSites += FunctionCallSitesMap.size(); + NumSCCsWithStaticCallSites++; + + // We are also done if no argument has an abstract state with optimistically + // assumed attributes. To determine this, and to later speed up the fixpoint + // computation, we remove all mappings for functions without annotateable + // arguments. + for (auto &It : FunctionPointerArgumentsMap) { + unsigned NumArgsWithAttrs = 0; + for (AbstractArgument &AA : It.second) + NumArgsWithAttrs += AA.Attrs.hasAny(); + if (!NumArgsWithAttrs) + FunctionPointerArgumentsMap.erase(It.getFirst()); + } + + // Check again if we are now done. + if (FunctionCallSitesMap.empty()) + return false; + + const DataLayout &DL = SCCNodes.front()->getParent()->getDataLayout(); + + // TODO: MaxIterations is an arbitrary chosen threshold that should be + // revisited. It is also unclear if we even need it (in practise). + int MaxIterations = 6; + + // Since we have at least one function that could potentially be annotated, we + // perform a fixpoint iteration to determine which attributes we can actually + // manifest. + bool Fixpoint; + do { + Fixpoint = true; + + // We analyze one function at a time and iterate over all call sites + // _inside_ that function. This way we can use the analysis information + // for the function multiple times. + for (auto &It : FunctionCallSitesMap) { + Function &CallSiteFunction = *It.first; + auto &AbstractCallSites = It.second; + + AAResults &AAR = AARGetter(CallSiteFunction); + const DominatorTree &DT = DomTreeGetter(CallSiteFunction); + + // For each abstract call site in the function (CallSiteFunction) we first + // lookup the abstract argument states for the pointer arguments of the + // called function. + for (AbstractCallSite &ACS : AbstractCallSites) { + + // Check if the callee is still mapped to abstract arguments. They could + // have been removed if none of them had attributes left that we + // potentially could add to them. + const auto &FPAMIt = FunctionPointerArgumentsMap.find(ACS.Callee); + if (FPAMIt == FunctionPointerArgumentsMap.end()) + continue; + + // Since we have abstract arguments for the callee, we will now check if + // their attributes are still valid and remove the ones that are not. + auto &AbstractArguments = FPAMIt->getSecond(); + assert(!AbstractArguments.empty() && + "Abstract arguments were mapped but empty"); + + unsigned NumArguments = AbstractArguments.size(); + unsigned NumArgsWithAttrs = 0; + for (unsigned u = 0; u < NumArguments; u++) { + // If no attribute is set, there is nothing to remove. + if (!AbstractArguments[u].Attrs.hasAny()) + continue; + + // If attributes are set, we remove invalid ones. In case this changes + // the attributes, thus some were removed, we have not found a + // fixpoint yet. + Fixpoint &= !removeInvalidAttributes( + ACS.Params[u], AbstractArguments[u], ACS, DL, AAR, DT, + FunctionPointerArgumentsMap); + + // If there are no attributes set in the new state, we might be able + // to remove the called function (ACS.Callee) from the subsequent + // fixpoint iterations. Thus, we check if any argument is still set. + if (AbstractArguments[u].Attrs.hasAny()) + NumArgsWithAttrs++; + } + + // If no argument of the callee has attributes left, we eliminate the + // callee from subsequent iterations. + if (!NumArgsWithAttrs) + FunctionPointerArgumentsMap.erase(ACS.Callee); + } + } + } while (!Fixpoint && --MaxIterations > 0); + + // Check if a fixpoint was reached or the maximal number of iterations. In + // case of the latter, bail as we do not have a sound solution at this point. + if (!Fixpoint) + return false; + + // We also bail if no functions with annotatable arguments remain. + if (FunctionCallSitesMap.empty()) + return false; + + LLVMContext &Ctx = SCCNodes.front()->getContext(); + + // For the ones that are still left, we iterate over the pointer arguments, + // check the attributes that are set, and manifest them in the IR. + for (auto &It : FunctionPointerArgumentsMap) { + for (AbstractArgument &AA : It.second) { + + if (!AA.Attrs.hasAny()) + continue; + + // We avoid adding a dereferencableOrNull annotation if it is implied + // by a new dereferencable attribute. + if (AA.Attrs.getDereferencableAttr() >= + AA.Attrs.getDereferencableOrNullAttr()) + AA.Attrs.clearDereferencableOrNullAttr(); + + // We avoid alignment annotations that are implied by the data layout. + if (AA.Attrs.hasAlignmentAttr() && + AA.Arg->getPointerAlignment(DL) >= AA.Attrs.getAlignmentAttr()) + AA.Attrs.clearAlignmentAttr(); + + // Since read-none implies read-only and write-only we do only manifest + // the first. + if (AA.Attrs.hasReadNoneAttr()) { + AA.Attrs.clearReadOnlyAttr(); + AA.Attrs.clearWriteOnlyAttr(); + } + + // Helper to manifest attributes for the argument (AA.Arg). All attributes + // that are set in the abstract state (AA.Attrs) are materialized here. +#define ABSTRACT_ARGUMENT_ATTR_FUNC(NAME, KIND, INIT) \ + if (AA.Attrs.has##NAME##Attr()) { \ + if (std::is_same::value) \ + AA.Arg->addAttr(KIND); \ + else \ + AA.Arg->addAttr(Attribute::get(Ctx, KIND, AA.Attrs.get##NAME##Attr())); \ + Num##NAME##Arg++; \ + } + + ABSTRACT_ARGUMENT_ATTRIBUTES() +#undef ABSTRACT_ARGUMENT_ATTR_FUNC + + } + } + + return true; +} + +#undef ABSTRACT_ARGUMENT_ATTRIBUTES + /// Deduce returned attributes for the SCC. static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) { bool Changed = false; @@ -646,7 +1357,7 @@ ++A) { if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { A->addAttr(Attribute::NoCapture); - ++NumNoCapture; + ++NumNoCaptureArg; Changed = true; } } @@ -665,7 +1376,7 @@ if (Tracker.Uses.empty()) { // If it's trivially not captured, mark it nocapture now. A->addAttr(Attribute::NoCapture); - ++NumNoCapture; + ++NumNoCaptureArg; Changed = true; } else { // If it's not trivially captured and not trivially not captured, @@ -716,7 +1427,7 @@ ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { Argument *A = ArgumentSCC[0]->Definition; A->addAttr(Attribute::NoCapture); - ++NumNoCapture; + ++NumNoCaptureArg; Changed = true; } continue; @@ -758,7 +1469,7 @@ for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; A->addAttr(Attribute::NoCapture); - ++NumNoCapture; + ++NumNoCaptureArg; Changed = true; } @@ -1444,6 +2155,9 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + getAAResultsAnalysisUsage(AU); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); } @@ -1492,7 +2206,10 @@ return setDoesNotRecurse(F); } -static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) { +template +static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG, + AARGetterT &&AARGetter, + DomTreeGetterTy &DomTreeGetter) { // We only have a post-order SCC traversal (because SCCs are inherently // discovered in post-order), so we accumulate them in a vector and then walk // it in reverse. This is simpler than using the RPO iterator infrastructure @@ -1500,20 +2217,44 @@ // graph. We can also cheat egregiously because we're primarily interested in // synthesizing norecurse and so we can only save the singular SCCs as SCCs // with multiple functions in them will clearly be recursive. - SmallVector Worklist; + // + // NOTE: The above reasoning is only true for the identification of + // non-recursive singleton SCCs. However, we also want to derive callee + // attributes in reverse post order. For this the SCCs do not need to be + // singletons nor is the "egregious cheat" still applicable. + // FIXME: Perform a real reverse post order traversal over the SCCs. + SmallVector, 16> Worklist; for (scc_iterator I = scc_begin(&CG); !I.isAtEnd(); ++I) { - if (I->size() != 1) + assert(I->size() > 0); + // If we do not propagate attributes to the callee we are only interested + // in singleton SCCs. + if (DisableCalleeArgumentAttrInference && I->size() > 1) continue; - Function *F = I->front()->getFunction(); - if (F && !F->isDeclaration() && !F->doesNotRecurse() && - F->hasInternalLinkage()) - Worklist.push_back(F); + // Skip singleton SCCs that do not have a definition. + if (I->size() == 1 && (!I->front()->getFunction() || + I->front()->getFunction()->isDeclaration())) + continue; + + SmallVector SCCNodes; + for (auto It : *I) { + Function *F = It->getFunction(); + assert(F && !F->isDeclaration() && + "Non singleton CGSCC should have a function definition."); + SCCNodes.push_back(F); + } + + Worklist.push_back(std::move(SCCNodes)); } bool Changed = false; - for (auto *F : llvm::reverse(Worklist)) - Changed |= addNoRecurseAttrsTopDown(*F); + for (auto &SCCNodes : llvm::reverse(Worklist)) { + if (SCCNodes.size() == 1 && !SCCNodes.front()->doesNotRecurse() && + SCCNodes.front()->hasInternalLinkage()) + Changed |= addNoRecurseAttrsTopDown(*SCCNodes.front()); + if (!DisableCalleeArgumentAttrInference) + Changed |= deriveCalleeArgumentAttrs(SCCNodes, AARGetter, DomTreeGetter); + } return Changed; } @@ -1524,14 +2265,31 @@ auto &CG = getAnalysis().getCallGraph(); - return deduceFunctionAttributeInRPO(M, CG); + auto DomTreeGetter = [&](Function &F) -> const DominatorTree & { + return getAnalysis(F).getDomTree(); + }; + + return deduceFunctionAttributeInRPO(M, CG, LegacyAARGetter(*this), + DomTreeGetter); } PreservedAnalyses ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { auto &CG = AM.getResult(M); - if (!deduceFunctionAttributeInRPO(M, CG)) + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + + // We pass a lambda into functions to wire them up to the analysis manager + // for getting function analyses. + auto AARGetter = [&](Function &F) -> AAResults & { + return FAM.getResult(F); + }; + auto DomTreeGetter = [&](Function &F) -> const DominatorTree & { + return FAM.getResult(F); + }; + + if (!deduceFunctionAttributeInRPO(M, CG, AARGetter, DomTreeGetter)) return PreservedAnalyses::all(); PreservedAnalyses PA; Index: test/Other/opt-O2-pipeline.ll =================================================================== --- test/Other/opt-O2-pipeline.ll +++ test/Other/opt-O2-pipeline.ll @@ -176,6 +176,7 @@ ; CHECK-NEXT: Eliminate Available Externally Globals ; CHECK-NEXT: CallGraph Construction ; CHECK-NEXT: Deduce function attributes in RPO +; CHECK-NEXT: Unnamed pass: implement Pass::getPassName() ; CHECK-NEXT: Global Variable Optimizer ; CHECK-NEXT: Unnamed pass: implement Pass::getPassName() ; CHECK-NEXT: Dead Global Elimination @@ -284,6 +285,9 @@ ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Pass Arguments: +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Constructio +; CHECK-NEXT: Pass Arguments: ; CHECK-NEXT: Target Library Information ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction Index: test/Other/opt-O3-pipeline.ll =================================================================== --- test/Other/opt-O3-pipeline.ll +++ test/Other/opt-O3-pipeline.ll @@ -180,6 +180,7 @@ ; CHECK-NEXT: Eliminate Available Externally Globals ; CHECK-NEXT: CallGraph Construction ; CHECK-NEXT: Deduce function attributes in RPO +; CHECK-NEXT: Unnamed pass: implement Pass::getPassName() ; CHECK-NEXT: Global Variable Optimizer ; CHECK-NEXT: Unnamed pass: implement Pass::getPassName() ; CHECK-NEXT: Dead Global Elimination @@ -288,6 +289,9 @@ ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Pass Arguments: +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Constructio +; CHECK-NEXT: Pass Arguments: ; CHECK-NEXT: Target Library Information ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction Index: test/Other/opt-Os-pipeline.ll =================================================================== --- test/Other/opt-Os-pipeline.ll +++ test/Other/opt-Os-pipeline.ll @@ -163,6 +163,7 @@ ; CHECK-NEXT: Eliminate Available Externally Globals ; CHECK-NEXT: CallGraph Construction ; CHECK-NEXT: Deduce function attributes in RPO +; CHECK-NEXT: Unnamed pass: implement Pass::getPassName() ; CHECK-NEXT: Global Variable Optimizer ; CHECK-NEXT: Unnamed pass: implement Pass::getPassName() ; CHECK-NEXT: Dead Global Elimination @@ -271,6 +272,9 @@ ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Pass Arguments: +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Constructio +; CHECK-NEXT: Pass Arguments: ; CHECK-NEXT: Target Library Information ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction Index: test/Other/pass-pipelines.ll =================================================================== --- test/Other/pass-pipelines.ll +++ test/Other/pass-pipelines.ll @@ -62,6 +62,7 @@ ; CHECK-O2-NEXT: Deduce function attributes in RPO ; CHECK-O2-NOT: Manager ; Reduce the size of the IR ASAP after the inliner. +; CHECK-O2-NEXT: Unnamed pass: implement Pass::getPassName() ; CHECK-O2-NEXT: Global Variable Optimizer ; CHECK-O2: Dead Global Elimination ; Next is the late function pass pipeline. Index: test/Transforms/FunctionAttrs/internal_callee.ll =================================================================== --- /dev/null +++ test/Transforms/FunctionAttrs/internal_callee.ll @@ -0,0 +1,43 @@ +; RUN: opt -rpo-functionattrs -S < %s | FileCheck %s +; +; Check that we derive the noalias and dereferenceable_or_null attribute +; for the argument of the rec_internal function. +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: noinline nounwind uwtable +define dso_local void @g(i32* noalias dereferenceable_or_null(4) %A) #0 { +entry: + call void @rec_internal(i32* nonnull %A) + ret void +} + +; Function Attrs: noinline nounwind uwtable +define dso_local void @h() #0 { +entry: + %A = alloca i32, align 4 + store i32 5, i32* %A, align 4 + call void @rec_internal(i32* nonnull %A) + ret void +} + +; CHECK: define internal void @rec_internal(i32* noalias dereferenceable_or_null(4) %A) +; +; Function Attrs: noinline nounwind uwtable +define internal void @rec_internal(i32* %A) #0 { +entry: + %0 = load i32, i32* %A, align 4 + %sub = add nsw i32 %0, -1 + store i32 %sub, i32* %A, align 4 + %tobool = icmp eq i32 %sub, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + call void @rec_internal(i32* %A) + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + +attributes #0 = { noinline nounwind uwtable } Index: test/Transforms/FunctionAttrs/internal_noalias.ll =================================================================== --- /dev/null +++ test/Transforms/FunctionAttrs/internal_noalias.ll @@ -0,0 +1,54 @@ +; RUN: opt -rpo-functionattrs -S < %s | FileCheck %s +; +; Check that we derive noalias attributes for the arguments of the +; noalias_args_argmem function from its call sites. For the noalias_args +; function we shall only do so for the second argument. +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: noinline nounwind uwtable +define dso_local i32 @visible(i32* noalias %A, i32* noalias %B) #0 { +entry: + %call1 = call i32 @noalias_args(i32* %A, i32* %B) + %call2 = call i32 @noalias_args_argmem(i32* %A, i32* %B) + %add = add nsw i32 %call1, %call2 + ret i32 %add +} + +; CHECK: define internal i32 @noalias_args(i32* %A, i32* noalias %B) +; +; Function Attrs: noinline nounwind uwtable +define internal i32 @noalias_args(i32* %A, i32* %B) #0 { +entry: + %0 = load i32, i32* %A, align 4 + %1 = load i32, i32* %B, align 4 + %add = add nsw i32 %0, %1 + %call = call i32 @noalias_args_argmem(i32* %A, i32* %B) + %add2 = add nsw i32 %add, %call + ret i32 %add2 +} + +; CHECK: define internal i32 @noalias_args_argmem(i32* noalias %A, i32* noalias %B) +; +; Function Attrs: noinline nounwind uwtable +define internal i32 @noalias_args_argmem(i32* %A, i32* %B) #1 { +entry: + %0 = load i32, i32* %A, align 4 + %1 = load i32, i32* %B, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +} + +; Function Attrs: noinline nounwind uwtable +define dso_local i32 @visible_local(i32* %A) #0 { +entry: + %B = alloca i32, align 4 + store i32 5, i32* %B, align 4 + %call1 = call i32 @noalias_args(i32* %A, i32* nonnull %B) + %call2 = call i32 @noalias_args_argmem(i32* %A, i32* nonnull %B) + %add = add nsw i32 %call1, %call2 + ret i32 %add +} + +attributes #0 = { noinline nounwind uwtable } +attributes #1 = { argmemonly noinline nounwind uwtable }