Index: clang/lib/CodeGen/CGCall.cpp =================================================================== --- clang/lib/CodeGen/CGCall.cpp +++ clang/lib/CodeGen/CGCall.cpp @@ -4333,13 +4333,13 @@ // Apply some call-site-specific attributes. // TODO: work this into building the attribute set. - // Apply always_inline to all calls within flatten functions. + // Apply flatten to all calls within flatten functions. // FIXME: should this really take priority over __try, below? if (CurCodeDecl && CurCodeDecl->hasAttr() && !(TargetDecl && TargetDecl->hasAttr())) { Attrs = Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, - llvm::Attribute::AlwaysInline); + llvm::Attribute::Flatten); } // Disable inlining inside SEH __try blocks. Index: llvm/docs/BitCodeFormat.rst =================================================================== --- llvm/docs/BitCodeFormat.rst +++ llvm/docs/BitCodeFormat.rst @@ -1059,6 +1059,7 @@ * code 57: ``optforfuzzing`` * code 58: ``shadowcallstack`` * code 64: ``sanitize_memtag`` +* code 65: ``flatten`` .. note:: The ``allocsize`` attribute has a special encoding for its arguments. Its two Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -1425,6 +1425,9 @@ can prove that the function does not execute any convergent operations. Similarly, the optimizer may remove ``convergent`` on calls/invokes when it can prove that the call/invoke cannot call a convergent function. +``flatten`` + This attribute is similar to ``alwaysinline``, but applies recursively to + all inlined function calls. ``inaccessiblememonly`` This attribute indicates that the function may only access memory that is not accessible by the module being compiled. This is a weaker form Index: llvm/include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -633,6 +633,7 @@ ATTR_KIND_NOFREE = 62, ATTR_KIND_NOSYNC = 63, ATTR_KIND_SANITIZE_MEMTAG = 64, + ATTR_KIND_FLATTEN = 65, }; enum ComdatSelectionKindCodes { Index: llvm/include/llvm/IR/Attributes.td =================================================================== --- llvm/include/llvm/IR/Attributes.td +++ llvm/include/llvm/IR/Attributes.td @@ -45,6 +45,9 @@ /// Pointer is either null or dereferenceable. def DereferenceableOrNull : EnumAttr<"dereferenceable_or_null">; +/// Like AlwaysInline, but applies recursively +def Flatten : EnumAttr<"flatten">; + /// Function may only access memory that is inaccessible from IR. def InaccessibleMemOnly : EnumAttr<"inaccessiblememonly">; Index: llvm/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/lib/Analysis/InlineCost.cpp +++ llvm/lib/Analysis/InlineCost.cpp @@ -2071,6 +2071,14 @@ if (Call.isNoInline()) return llvm::InlineCost::getNever("noinline call site attribute"); + // Inline call sites marked flatten only if not marked noinline + if (Call.hasFnAttr(Attribute::Flatten)) { + auto IsViable = isInlineViable(*Callee); + if (IsViable) + return llvm::InlineCost::getAlways("flatten attribute"); + return llvm::InlineCost::getNever(IsViable.message); + } + LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "... (caller:" << Caller->getName() << ")\n"); Index: llvm/lib/AsmParser/LLLexer.cpp =================================================================== --- llvm/lib/AsmParser/LLLexer.cpp +++ llvm/lib/AsmParser/LLLexer.cpp @@ -640,6 +640,7 @@ KEYWORD(convergent); KEYWORD(dereferenceable); KEYWORD(dereferenceable_or_null); + KEYWORD(flatten); KEYWORD(inaccessiblememonly); KEYWORD(inaccessiblemem_or_argmemonly); KEYWORD(inlinehint); Index: llvm/lib/AsmParser/LLParser.cpp =================================================================== --- llvm/lib/AsmParser/LLParser.cpp +++ llvm/lib/AsmParser/LLParser.cpp @@ -1271,6 +1271,7 @@ case lltok::kw_builtin: B.addAttribute(Attribute::Builtin); break; case lltok::kw_cold: B.addAttribute(Attribute::Cold); break; case lltok::kw_convergent: B.addAttribute(Attribute::Convergent); break; + case lltok::kw_flatten: B.addAttribute(Attribute::Flatten); break; case lltok::kw_inaccessiblememonly: B.addAttribute(Attribute::InaccessibleMemOnly); break; case lltok::kw_inaccessiblemem_or_argmemonly: @@ -1653,6 +1654,7 @@ case lltok::kw_alwaysinline: case lltok::kw_argmemonly: case lltok::kw_builtin: + case lltok::kw_flatten: case lltok::kw_inlinehint: case lltok::kw_jumptable: case lltok::kw_minsize: @@ -1752,6 +1754,7 @@ case lltok::kw_argmemonly: case lltok::kw_builtin: case lltok::kw_cold: + case lltok::kw_flatten: case lltok::kw_inlinehint: case lltok::kw_jumptable: case lltok::kw_minsize: Index: llvm/lib/AsmParser/LLToken.h =================================================================== --- llvm/lib/AsmParser/LLToken.h +++ llvm/lib/AsmParser/LLToken.h @@ -186,6 +186,7 @@ kw_convergent, kw_dereferenceable, kw_dereferenceable_or_null, + kw_flatten, kw_inaccessiblememonly, kw_inaccessiblemem_or_argmemonly, kw_inlinehint, Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1296,6 +1296,9 @@ case Attribute::SanitizeMemTag: llvm_unreachable("sanitize_memtag attribute not supported in raw format"); break; + case Attribute::Flatten: + llvm_unreachable("flatten attribute not supported in raw format"); + break; } llvm_unreachable("Unsupported attribute type"); } @@ -1425,6 +1428,8 @@ return Attribute::Cold; case bitc::ATTR_KIND_CONVERGENT: return Attribute::Convergent; + case bitc::ATTR_KIND_FLATTEN: + return Attribute::Flatten; case bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY: return Attribute::InaccessibleMemOnly; case bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY: Index: llvm/lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -616,6 +616,8 @@ return bitc::ATTR_KIND_IN_ALLOCA; case Attribute::Cold: return bitc::ATTR_KIND_COLD; + case Attribute::Flatten: + return bitc::ATTR_KIND_FLATTEN; case Attribute::InaccessibleMemOnly: return bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY; case Attribute::InaccessibleMemOrArgMemOnly: Index: llvm/lib/IR/Attributes.cpp =================================================================== --- llvm/lib/IR/Attributes.cpp +++ llvm/lib/IR/Attributes.cpp @@ -290,6 +290,8 @@ return "builtin"; if (hasAttribute(Attribute::Convergent)) return "convergent"; + if (hasAttribute(Attribute::Flatten)) + return "flatten"; if (hasAttribute(Attribute::SwiftError)) return "swifterror"; if (hasAttribute(Attribute::SwiftSelf)) Index: llvm/lib/IR/Verifier.cpp =================================================================== --- llvm/lib/IR/Verifier.cpp +++ llvm/lib/IR/Verifier.cpp @@ -1543,6 +1543,7 @@ case Attribute::SpeculativeLoadHardening: case Attribute::Speculatable: case Attribute::StrictFP: + case Attribute::Flatten: return true; default: break; @@ -1647,6 +1648,12 @@ "'noinline and alwaysinline' are incompatible!", V); + Assert(!(Attrs.hasAttribute(Attribute::NoInline) && + Attrs.hasAttribute(Attribute::Flatten)), + "Attributes " + "'noinline and flatten' are incompatible!", + V); + if (Attrs.hasAttribute(Attribute::ByVal) && Attrs.getByValType()) { Assert(Attrs.getByValType() == cast(Ty)->getElementType(), "Attribute 'byval' type does not match parameter!", V); @@ -1799,6 +1806,10 @@ Attrs.hasFnAttribute(Attribute::AlwaysInline)), "Attributes 'noinline and alwaysinline' are incompatible!", V); + Assert(!(Attrs.hasFnAttribute(Attribute::NoInline) && + Attrs.hasFnAttribute(Attribute::Flatten)), + "Attributes 'noinline and flatten' are incompatible!", V); + if (Attrs.hasFnAttribute(Attribute::OptimizeNone)) { Assert(Attrs.hasFnAttribute(Attribute::NoInline), "Attribute 'optnone' requires 'noinline'!", V); Index: llvm/lib/Target/AMDGPU/AMDGPUInline.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInline.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInline.cpp @@ -188,7 +188,8 @@ if (!TTI.areInlineCompatible(Caller, Callee)) return llvm::InlineCost::getNever("incompatible"); - if (CS.hasFnAttr(Attribute::AlwaysInline)) { + if (CS.hasFnAttr(Attribute::AlwaysInline) || + CS.hasFnAttr(Attribute::Flatten)) { auto IsViable = isInlineViable(*Callee); if (IsViable) return llvm::InlineCost::getAlways("alwaysinline viable"); Index: llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -2088,7 +2088,8 @@ // Don't generate memmove if this function will be inlined. This is // because the caller will undergo this transformation after inlining. Function *Func = CurLoop->getHeader()->getParent(); - if (Func->hasFnAttribute(Attribute::AlwaysInline)) + if (Func->hasFnAttribute(Attribute::AlwaysInline) || + Func->hasFnAttribute(Attribute::Flatten)) goto CleanupAndExit; // In case of a memmove, the call to memmove will be executed instead Index: llvm/lib/Transforms/IPO/AlwaysInliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -46,7 +46,9 @@ bool Changed = false; SmallVector InlinedFunctions; for (Function &F : M) - if (!F.isDeclaration() && F.hasFnAttribute(Attribute::AlwaysInline) && + if (!F.isDeclaration() && + (F.hasFnAttribute(Attribute::AlwaysInline) || + F.hasFnAttribute(Attribute::Flatten) ) && isInlineViable(F)) { Calls.clear(); @@ -162,7 +164,8 @@ if (Callee->isDeclaration()) return InlineCost::getNever("no definition"); - if (!CS.hasFnAttr(Attribute::AlwaysInline)) + if (!CS.hasFnAttr(Attribute::AlwaysInline) && + !CS.hasFnAttr(Attribute::Flatten)) return InlineCost::getNever("no alwaysinline attribute"); auto IsViable = isInlineViable(*Callee); Index: llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp =================================================================== --- llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp +++ llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp @@ -30,6 +30,7 @@ .Case("builtin", Attribute::Builtin) .Case("cold", Attribute::Cold) .Case("convergent", Attribute::Convergent) + .Case("flatten", Attribute::Flatten) .Case("inlinehint", Attribute::InlineHint) .Case("jumptable", Attribute::JumpTable) .Case("minsize", Attribute::MinSize) Index: llvm/lib/Transforms/IPO/HotColdSplitting.cpp =================================================================== --- llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -201,6 +201,8 @@ bool HotColdSplitting::shouldOutlineFrom(const Function &F) const { if (F.hasFnAttribute(Attribute::AlwaysInline)) return false; + if (F.hasFnAttribute(Attribute::Flatten)) + return false; if (F.hasFnAttribute(Attribute::NoInline)) return false; Index: llvm/lib/Transforms/IPO/Inliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/Inliner.cpp +++ llvm/lib/Transforms/IPO/Inliner.cpp @@ -681,6 +681,8 @@ DebugLoc DLoc = CS->getDebugLoc(); BasicBlock *Block = CS.getParent(); + bool Flatten = CS.hasFnAttr(Attribute::Flatten); + // Attempt to inline the function. using namespace ore; @@ -709,8 +711,13 @@ int NewHistoryID = InlineHistory.size(); InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID)); - for (Value *Ptr : InlineInfo.InlinedCalls) - CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); + for (Value *Ptr : InlineInfo.InlinedCalls) { + CallSite NewCS(Ptr); + // propagate the flatten attribute to all inlined call sites + if (Flatten) + NewCS.addAttribute(AttributeList::FunctionIndex, Attribute::Flatten); + CallSites.push_back(std::make_pair(NewCS, NewHistoryID)); + } } } @@ -810,7 +817,9 @@ // Handle the case when this function is called and we only want to care // about always-inline functions. This is a bit of a hack to share code // between here and the InlineAlways pass. - if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline)) + if (AlwaysInlineOnly && + !F->hasFnAttribute(Attribute::AlwaysInline) && + !F->hasFnAttribute(Attribute::Flatten)) continue; // If the only remaining users of the function are dead constants, remove @@ -1073,6 +1082,8 @@ DebugLoc DLoc = CS->getDebugLoc(); BasicBlock *Block = CS.getParent(); + bool Flatten = CS.hasFnAttr(Attribute::Flatten); + using namespace ore; InlineResult IR = InlineFunction(CS, IFI); @@ -1098,8 +1109,12 @@ InlineHistory.push_back({&Callee, InlineHistoryID}); for (CallSite &CS : reverse(IFI.InlinedCallSites)) if (Function *NewCallee = CS.getCalledFunction()) - if (!NewCallee->isDeclaration()) + if (!NewCallee->isDeclaration()) { + // propagate the flatten attribute to all inlined call sites + if (Flatten) + CS.addAttribute(AttributeList::FunctionIndex, Attribute::Flatten); Calls.push_back({CS, NewHistoryID}); + } } if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) Index: llvm/lib/Transforms/IPO/PartialInlining.cpp =================================================================== --- llvm/lib/Transforms/IPO/PartialInlining.cpp +++ llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -1262,6 +1262,9 @@ if (F->hasFnAttribute(Attribute::AlwaysInline)) return {false, nullptr}; + if (F->hasFnAttribute(Attribute::Flatten)) + return {false, nullptr}; + if (F->hasFnAttribute(Attribute::NoInline)) return {false, nullptr}; Index: llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp =================================================================== --- llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp +++ llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp @@ -77,6 +77,7 @@ if (F.isDeclaration()) continue; if (F.hasFnAttribute(Attribute::AlwaysInline) || + F.hasFnAttribute(Attribute::Flatten) || F.hasFnAttribute(Attribute::InlineHint)) { // Use a higher value for inline functions to account for the fact that // these are usually beneficial to inline. Index: llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -697,7 +697,8 @@ // have its address taken. Doing so would create an undefined external ref to // the function, which would fail to link. if (HasAvailableExternallyLinkage && - F->hasFnAttribute(Attribute::AlwaysInline)) + (F->hasFnAttribute(Attribute::AlwaysInline) || + F->hasFnAttribute(Attribute::Flatten))) return false; // Prohibit function address recording if the function is both internal and Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -888,6 +888,7 @@ // Those attributes should be safe to propagate to the extracted function. case Attribute::AlwaysInline: case Attribute::Cold: + case Attribute::Flatten: case Attribute::NoRecurse: case Attribute::InlineHint: case Attribute::MinSize: Index: llvm/test/Transforms/Inline/flatten.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/flatten.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -inline -S | FileCheck %s + +declare void @ext() + +define void @should_inline2() { + call void @ext() + ret void +} + +define void @should_inline() { + call void @should_inline2() + ret void +} + +define void @should_not_inline() noinline { + call void @ext() + ret void +} + +; CHECK: @flattened() { +define void @flattened() { +; CHECK-NEXT; @ext() #1 + call void @should_inline() flatten +; CHECK-NEXT; @should_not_inline() #1 + call void @should_not_inline() flatten + ret void +} + +; CHECK: attributes #1 = { flatten }