Index: llvm/include/llvm/Transforms/Coroutines.h =================================================================== --- llvm/include/llvm/Transforms/Coroutines.h +++ llvm/include/llvm/Transforms/Coroutines.h @@ -13,6 +13,18 @@ namespace llvm { +// CoroEarly pass marks every function that has coro.begin with a string +// attribute "coroutine.presplit"="0". CoroSplit pass processes the coroutine +// twice. First, it lets it go through complete IPO optimization pipeline as a +// single function. It forces restart of the pipeline by inserting an indirect +// call to an empty function "coro.devirt.trigger" which is devirtualized by +// CoroElide pass that triggers a restart of the pipeline by CGPassManager. +// When CoroSplit pass sees the same coroutine the second time, it splits it up, +// adds coroutine subfunctions to the SCC to be processed by IPO pipeline. +#define CORO_PRESPLIT_ATTR "coroutine.presplit" +#define UNPREPARED_FOR_SPLIT "0" +#define PREPARED_FOR_SPLIT "1" + class Pass; class PassManagerBuilder; Index: llvm/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/lib/Analysis/InlineCost.cpp +++ llvm/lib/Analysis/InlineCost.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Coroutines.h" using namespace llvm; @@ -2328,6 +2329,13 @@ if (!Callee) return InlineResult::failure("indirect call"); + // When callee coroutine function is inlined into caller coroutine function + // before coro-split pass, + // coro-early pass can not handle this quiet well. + // So we won't inline the coroutine function if it have not been unsplited + if (Callee->hasFnAttribute(CORO_PRESPLIT_ATTR)) + return InlineResult::failure("unsplited coroutine call"); + // Never inline calls with byval arguments that does not have the alloca // address space. Since byval arguments can be replaced with a copy to an // alloca, the inlined code would need to be adjusted to handle that the Index: llvm/lib/Transforms/Coroutines/CoroInternal.h =================================================================== --- llvm/lib/Transforms/Coroutines/CoroInternal.h +++ llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -26,19 +26,6 @@ void initializeCoroElideLegacyPass(PassRegistry &); void initializeCoroCleanupLegacyPass(PassRegistry &); -// CoroEarly pass marks every function that has coro.begin with a string -// attribute "coroutine.presplit"="0". CoroSplit pass processes the coroutine -// twice. First, it lets it go through complete IPO optimization pipeline as a -// single function. It forces restart of the pipeline by inserting an indirect -// call to an empty function "coro.devirt.trigger" which is devirtualized by -// CoroElide pass that triggers a restart of the pipeline by CGPassManager. -// When CoroSplit pass sees the same coroutine the second time, it splits it up, -// adds coroutine subfunctions to the SCC to be processed by IPO pipeline. - -#define CORO_PRESPLIT_ATTR "coroutine.presplit" -#define UNPREPARED_FOR_SPLIT "0" -#define PREPARED_FOR_SPLIT "1" - #define CORO_DEVIRT_TRIGGER_FN "coro.devirt.trigger" namespace coro { Index: llvm/lib/Transforms/IPO/AlwaysInliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" +#include "llvm/Transforms/Coroutines.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -44,7 +45,14 @@ SmallSetVector Calls; bool Changed = false; SmallVector InlinedFunctions; - for (Function &F : M) + for (Function &F : M) { + // When callee coroutine function is inlined into caller coroutine function + // before coro-split pass, + // coro-early pass can not handle this quiet well. + // So we won't inline the coroutine function if it have not been unsplited + if (F.hasFnAttribute(CORO_PRESPLIT_ATTR)) + continue; + if (!F.isDeclaration() && F.hasFnAttribute(Attribute::AlwaysInline) && isInlineViable(F).isSuccess()) { Calls.clear(); @@ -66,6 +74,7 @@ // invalidation issues while deleting functions. InlinedFunctions.push_back(&F); } + } // Remove any live functions. erase_if(InlinedFunctions, [&](Function *F) { @@ -158,6 +167,13 @@ if (!Callee) return InlineCost::getNever("indirect call"); + // When callee coroutine function is inlined into caller coroutine function + // before coro-split pass, + // coro-early pass can not handle this quiet well. + // So we won't inline the coroutine function if it have not been unsplited + if (Callee->hasFnAttribute(CORO_PRESPLIT_ATTR)) + return InlineCost::getNever("unsplited coroutine call"); + // FIXME: We shouldn't even get here for declarations. if (Callee->isDeclaration()) return InlineCost::getNever("no definition"); Index: llvm/test/Transforms/Coroutines/Inputs/sample.text.prof =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/Inputs/sample.text.prof @@ -0,0 +1,5 @@ +ff:152730084:141806 + 1: 123 + +foo:152730084:141806 + 65492: ff:302659 \ No newline at end of file Index: llvm/test/Transforms/Coroutines/coro-inline.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-inline.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -always-inline -barrier -coro-early -barrier -coro-split -S | FileCheck %s +; RUN: opt < %s -enable-new-pm -always-inline -coro-early -coro-split -S | FileCheck %s +; RUN: opt < %s -sample-profile-file=%S/Inputs/sample.text.prof -pgo-kind=pgo-sample-use-pipeline -coro-early -barrier -sample-profile -barrier -coro-split -disable-inlining=true -S | FileCheck %s +; RUN: opt < %s -enable-new-pm -sample-profile-file=%S/Inputs/sample.text.prof -pgo-kind=pgo-sample-use-pipeline -coro-early -sample-profile -coro-split -disable-inlining=true -S | FileCheck %s + +; Function Attrs: alwaysinline ssp uwtable +define void @ff() #0 !dbg !12 { +entry: + %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null) + %begin = call i8* @llvm.coro.begin(token %id, i8* null) + ret void +} + +; CHECK: call void @ff() +; Function Attrs: alwaysinline ssp uwtable +define void @foo() #0 !dbg !8 { +entry: + %id1 = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null) + %begin = call i8* @llvm.coro.begin(token %id1, i8* null) + call void @ff(), !dbg !11 + ret void +} + +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) +declare i8* @llvm.coro.begin(token, i8* writeonly) + +attributes #0 = { alwaysinline ssp uwtable "coroutine.presplit"="1" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "inline_O2.cpp", directory: "") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{i32 7, !"PIC Level", i32 2} +!8 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !1, file: !1, line: 46, type: !9, scopeLine: 46, flags: DIFlagPrototyped, unit: !0, retainedNodes: !2) +!9 = !DISubroutineType(types: !10) +!10 = !{null} +!11 = !DILocation(line: 2, column: 0, scope: !8) +!12 = distinct !DISubprogram(name: "ff", linkageName: "ff", scope: !1, file: !1, line: 46, type: !9, scopeLine: 46, flags: DIFlagPrototyped, unit: !0, retainedNodes: !2)