diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -6843,7 +6843,9 @@ param: 4, offset: [0, 5][, calls: ((Callee)[, (Callee)]*)]? where the first ``param`` is the number of the parameter it describes, -``offset`` is the known access range of the paramenter inside of the function. +``offset`` is the inclusive range of offsets from the pointer parameter to bytes +which can be accessed by the function. This range does not include accesses by +function calls from ``calls`` list. where each ``Callee`` decribes how parameter is forwared into other functions and looks like: @@ -6854,7 +6856,44 @@ The ``callee`` refers to the summary entry id of the callee, ``param`` is the number of the callee parameter which points into the callers parameter -with offset known to be inside of the ``offset`` range. +with offset known to be inside of the ``offset`` range. ``calls`` will be +consumed and removed by thin link stage to update ``Param::offset`` so it +covers all accesses possible by ``calls``. + +Pointer parameter without corresponding ``Param`` is considered unsafe and we +assume that access with any offset is possible. + +Example: + +If we have the following function: + +.. code-block:: text + + define i64 @foo(i64* %0, i32* %1, i8* %2, i8 %3) { + store i32* %1, i32** @x + %5 = getelementptr inbounds i8, i8* %2, i64 5 + %6 = load i8, i8* %5 + %7 = getelementptr inbounds i8, i8* %2, i8 %3 + tail call void @bar(i8 %3, i8* %7) + %8 = load i64, i64* %0 + ret i64 %8 + } + +We can expect the record like this: + +.. code-block:: text + + params: ((param: 0, offset: [0, 7]),(param: 2, offset: [5, 5], calls: ((callee: ^3, param: 1, offset: [-128, 127])))) + +The function may access just 8 bytes of the paramenter %0 . ``calls`` is empty, +so the parameter is either not used for function calls or ``offset`` already +covers all accesses from nested function calls. +Parameter %1 escapes, so access is unknown. +The function itself can access just a single byte of the parameter %2. Additional +access is possible inside of the ``@bar`` or ``^3``. The function adds signed +offset to the pointer and passes the result as the argument %1 into ``^3``. +This record itself does not tell us how ``^3`` will access the parameter. +Parameter %3 is not a pointer. .. _refs_summary: diff --git a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h --- a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h +++ b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h @@ -45,6 +45,12 @@ void print(raw_ostream &O) const; /// Parameters use for a FunctionSummary. + /// Function collects access information of all pointer parameters. + /// Information includes a range of direct access of parameters by the + /// functions and all call sites accepting the parameter. + /// StackSafety assumes that missing parameter information means possibility + /// of access to the parameter with any offset, so we can correctly link + /// code without StackSafety information, e.g. non-ThinLTO. std::vector getParamAccesses() const; }; diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -553,8 +553,7 @@ unsigned AlwaysInline : 1; }; - /// Describes the uses of a parameter by the range of offsets accessed in the - /// function and all of the call targets it is passed to. + /// Describes the uses of a parameter by the function. struct ParamAccess { static constexpr uint32_t RangeWidth = 64; @@ -564,7 +563,7 @@ struct Call { uint64_t ParamNo = 0; GlobalValue::GUID Callee = 0; - ConstantRange Offsets{RangeWidth, true}; + ConstantRange Offsets{/*BitWidth=*/RangeWidth, /*isFullSet=*/true}; Call() = default; Call(uint64_t ParamNo, GlobalValue::GUID Callee, @@ -573,7 +572,15 @@ }; uint64_t ParamNo = 0; - ConstantRange Use{RangeWidth, true}; + /// The range contains byte offsets from the parameter pointer which + /// accessed by the function. In the per-module summary, it only includes + /// accesses made by the function instructions. In the combined summary, it + /// also includes accesses by nested function calls. + ConstantRange Use{/*BitWidth=*/RangeWidth, /*isFullSet=*/true}; + /// In the per-module summary, it summarizes the byte offset applied to each + /// pointer parameter before passing to each corresponding callee. + /// In the combined summary, it's empty and information is propagated by + /// inter-procedural analysis and applied to the Use field. std::vector Calls; ParamAccess() = default; diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -748,13 +748,16 @@ return *Info; } -// Converts a StackSafetyFunctionInfo to the relevant FunctionSummary -// constructor fields std::vector StackSafetyInfo::getParamAccesses() const { + // Implementation transforms internal representation of parameter information + // into FunctionSummary format. std::vector ParamAccesses; for (const auto &KV : getInfo().Info.Params) { auto &PS = KV.second; + // Parameter accessed by any or unknown offset, represented as FullSet by + // StackSafety, is handled as the parameter for which we have no + // StackSafety info at all. So drop it to reduce summary size. if (PS.Range.isFullSet()) continue; @@ -763,6 +766,10 @@ Param.Calls.reserve(PS.Calls.size()); for (auto &C : PS.Calls) { + // Parameter forwarded into another function by any or unknown offset + // will make ParamAccess::Range as FullSet anyway. So we can drop the + // entire parameter like we did above. + // TODO(vitalybuka): Return already filtered parameters from getInfo(). if (C.Offset.isFullSet()) { ParamAccesses.pop_back(); break;