diff --git a/llvm/include/llvm/Transforms/Coroutines.h b/llvm/include/llvm/Transforms/Coroutines.h --- a/llvm/include/llvm/Transforms/Coroutines.h +++ b/llvm/include/llvm/Transforms/Coroutines.h @@ -13,25 +13,37 @@ namespace llvm { +// CoroEarly pass marks every function that has coro.begin with a string +// attribute "coroutine.presplit"="0". CoroSplit pass processes the coroutine +// twice. First, it lets it go through complete IPO optimization pipeline as a +// single function. It forces restart of the pipeline by inserting an indirect +// call to an empty function "coro.devirt.trigger" which is devirtualized by +// CoroElide pass that triggers a restart of the pipeline by CGPassManager. +// When CoroSplit pass sees the same coroutine the second time, it splits it up, +// adds coroutine subfunctions to the SCC to be processed by IPO pipeline. +#define CORO_PRESPLIT_ATTR "coroutine.presplit" +#define UNPREPARED_FOR_SPLIT "0" +#define PREPARED_FOR_SPLIT "1" + class Pass; class PassManagerBuilder; /// Add all coroutine passes to appropriate extension points. void addCoroutinePassesToExtensionPoints(PassManagerBuilder &Builder); /// Lower coroutine intrinsics that are not needed by later passes. Pass *createCoroEarlyLegacyPass(); /// Split up coroutines into multiple functions driving their state machines. Pass *createCoroSplitLegacyPass(); /// Analyze coroutines use sites, devirtualize resume/destroy calls and elide /// heap allocation for coroutine frame where possible. Pass *createCoroElideLegacyPass(); /// Lower all remaining coroutine intrinsics. Pass *createCoroCleanupLegacyPass(); } #endif diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -42,81 +42,82 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Coroutines.h" using namespace llvm; #define DEBUG_TYPE "inline-cost" STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); static cl::opt DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Default amount of inlining to perform")); static cl::opt PrintInstructionComments( "print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis")); static cl::opt InlineThreshold( "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)")); static cl::opt HintThreshold( "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint")); static cl::opt ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining cold callsites")); // We introduce this threshold to help performance of instrumentation based // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. static cl::opt ColdThreshold( "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute")); static cl::opt HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites ")); static cl::opt LocallyHotCallSiteThreshold( "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites ")); static cl::opt ColdCallSiteRelFreq( "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information.")); static cl::opt HotCallSiteRelFreq( "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information.")); static cl::opt OptComputeFullInlineCost( "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold.")); static cl::opt InlineCallerSupersetNoBuiltin( "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes.")); static cl::opt DisableGEPConstOperand( "disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands")); namespace { class InlineCostCallAnalyzer; // This struct is used to store information about inline cost of a // particular instruction struct InstructionCostDetail { @@ -2389,63 +2390,66 @@ function_ref GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { + if (Callee->hasFnAttribute(CORO_PRESPLIT_ATTR)) + return llvm::InlineCost::getNever("unsplited coroutine call"); + auto UserDecision = llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI); if (UserDecision.hasValue()) { if (UserDecision->isSuccess()) return llvm::InlineCost::getAlways("always inline attribute"); return llvm::InlineCost::getNever(UserDecision->getFailureReason()); } LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "... (caller:" << Call.getCaller()->getName() << ")\n"); InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE); InlineResult ShouldInline = CA.analyze(); LLVM_DEBUG(CA.dump()); // Check if there was a reason to force inlining or no inlining. if (!ShouldInline.isSuccess() && CA.getCost() < CA.getThreshold()) return InlineCost::getNever(ShouldInline.getFailureReason()); if (ShouldInline.isSuccess() && CA.getCost() >= CA.getThreshold()) return InlineCost::getAlways("empty function"); return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); } InlineResult llvm::isInlineViable(Function &F) { bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice); for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { // Disallow inlining of functions which contain indirect branches. if (isa(BI->getTerminator())) return InlineResult::failure("contains indirect branches"); // Disallow inlining of blockaddresses which are used by non-callbr // instructions. if (BI->hasAddressTaken()) for (User *U : BlockAddress::get(&*BI)->users()) if (!isa(*U)) return InlineResult::failure("blockaddress used outside of callbr"); for (auto &II : *BI) { CallBase *Call = dyn_cast(&II); if (!Call) continue; // Disallow recursive calls. if (&F == Call->getCalledFunction()) return InlineResult::failure("recursive call"); // Disallow calls which expose returns-twice to a function not previously // attributed as such. if (!ReturnsTwice && isa(Call) && cast(Call)->canReturnTwice()) return InlineResult::failure("exposes returns-twice attribute"); if (Call->getCalledFunction()) switch (Call->getCalledFunction()->getIntrinsicID()) { default: diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -26,23 +26,10 @@ void initializeCoroElideLegacyPass(PassRegistry &); void initializeCoroCleanupLegacyPass(PassRegistry &); -// CoroEarly pass marks every function that has coro.begin with a string -// attribute "coroutine.presplit"="0". CoroSplit pass processes the coroutine -// twice. First, it lets it go through complete IPO optimization pipeline as a -// single function. It forces restart of the pipeline by inserting an indirect -// call to an empty function "coro.devirt.trigger" which is devirtualized by -// CoroElide pass that triggers a restart of the pipeline by CGPassManager. -// When CoroSplit pass sees the same coroutine the second time, it splits it up, -// adds coroutine subfunctions to the SCC to be processed by IPO pipeline. - -#define CORO_PRESPLIT_ATTR "coroutine.presplit" -#define UNPREPARED_FOR_SPLIT "0" -#define PREPARED_FOR_SPLIT "1" - #define CORO_DEVIRT_TRIGGER_FN "coro.devirt.trigger" namespace coro { bool declaresIntrinsics(const Module &M, const std::initializer_list); void replaceAllCoroAllocs(CoroBeginInst *CB, bool Replacement); diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp --- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -22,15 +22,16 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" +#include "llvm/Transforms/Coroutines.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; #define DEBUG_TYPE "inline" PreservedAnalyses AlwaysInlinerPass::run(Module &M, ModuleAnalysisManager &MAM) { // Add inline assumptions during code generation. @@ -44,42 +45,46 @@ SmallSetVector Calls; bool Changed = false; SmallVector InlinedFunctions; - for (Function &F : M) + for (Function &F : M) { + if (F.hasFnAttribute(CORO_PRESPLIT_ATTR)) + continue; + if (!F.isDeclaration() && F.hasFnAttribute(Attribute::AlwaysInline) && isInlineViable(F).isSuccess()) { Calls.clear(); for (User *U : F.users()) if (auto *CB = dyn_cast(U)) if (CB->getCalledFunction() == &F) Calls.insert(CB); for (CallBase *CB : Calls) // FIXME: We really shouldn't be able to fail to inline at this point! // We should do something to log or check the inline failures here. Changed |= InlineFunction(*CB, IFI, /*CalleeAAR=*/nullptr, InsertLifetime) .isSuccess(); // Remember to try and delete this function afterward. This both avoids // re-walking the rest of the module and avoids dealing with any iterator // invalidation issues while deleting functions. InlinedFunctions.push_back(&F); } + } // Remove any live functions. erase_if(InlinedFunctions, [&](Function *F) { F->removeDeadConstantUsers(); return !F->isDefTriviallyDead(); }); // Delete the non-comdat ones from the module and also from our vector. auto NonComdatBegin = partition( InlinedFunctions, [&](Function *F) { return F->hasComdat(); }); for (Function *F : make_range(NonComdatBegin, InlinedFunctions.end())) M.getFunctionList().erase(F); InlinedFunctions.erase(NonComdatBegin, InlinedFunctions.end()); if (!InlinedFunctions.empty()) { // Now we just have the comdat functions. Filter out the ones whose comdats // are not actually dead. @@ -158,16 +163,19 @@ if (!Callee) return InlineCost::getNever("indirect call"); + if (Callee->hasFnAttribute(CORO_PRESPLIT_ATTR)) + return InlineCost::getNever("unsplited coroutine call"); + // FIXME: We shouldn't even get here for declarations. if (Callee->isDeclaration()) return InlineCost::getNever("no definition"); if (!CB.hasFnAttr(Attribute::AlwaysInline)) return InlineCost::getNever("no alwaysinline attribute"); auto IsViable = isInlineViable(*Callee); if (!IsViable.isSuccess()) return InlineCost::getNever(IsViable.getFailureReason()); return InlineCost::getAlways("always inliner"); } diff --git a/llvm/test/Transforms/Coroutines/Inputs/sample.text.prof b/llvm/test/Transforms/Coroutines/Inputs/sample.text.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/Inputs/sample.text.prof @@ -0,0 +1,5 @@ +ff:152730084:141806 + 1: 123 + +foo:152730084:141806 + 65492: ff:302659 \ No newline at end of file diff --git a/llvm/test/Transforms/Coroutines/coro-inline.ll b/llvm/test/Transforms/Coroutines/coro-inline.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-inline.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -always-inline -barrier -coro-early -barrier -coro-split -S | FileCheck %s +; RUN: opt < %s -enable-new-pm -always-inline -coro-early -coro-split -S | FileCheck %s +; RUN: opt < %s -sample-profile-file=%S/Inputs/sample.text.prof -pgo-kind=pgo-sample-use-pipeline -coro-early -barrier -sample-profile -barrier -coro-split -disable-inlining=true -S | FileCheck %s +; RUN: opt < %s -enable-new-pm -sample-profile-file=%S/Inputs/sample.text.prof -pgo-kind=pgo-sample-use-pipeline -coro-early -sample-profile -coro-split -disable-inlining=true -S | FileCheck %s + +; Function Attrs: alwaysinline ssp uwtable +define void @ff() #0 !dbg !12 { +entry: + %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null) + %begin = call i8* @llvm.coro.begin(token %id, i8* null) + ret void +} + +; CHECK: call void @ff() +; Function Attrs: alwaysinline ssp uwtable +define void @foo() #0 !dbg !8 { +entry: + %id1 = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null) + %begin = call i8* @llvm.coro.begin(token %id1, i8* null) + call void @ff(), !dbg !11 + ret void +} + +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) +declare i8* @llvm.coro.begin(token, i8* writeonly) + +attributes #0 = { alwaysinline ssp uwtable "coroutine.presplit"="1" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "inline_O2.cpp", directory: "") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{i32 7, !"PIC Level", i32 2} +!8 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !1, file: !1, line: 46, type: !9, scopeLine: 46, flags: DIFlagPrototyped, unit: !0, retainedNodes: !2) +!9 = !DISubroutineType(types: !10) +!10 = !{null} +!11 = !DILocation(line: 2, column: 0, scope: !8) +!12 = distinct !DISubprogram(name: "ff", linkageName: "ff", scope: !1, file: !1, line: 46, type: !9, scopeLine: 46, flags: DIFlagPrototyped, unit: !0, retainedNodes: !2)