diff --git a/llvm/include/llvm/Transforms/Coroutines.h b/llvm/include/llvm/Transforms/Coroutines.h --- a/llvm/include/llvm/Transforms/Coroutines.h +++ b/llvm/include/llvm/Transforms/Coroutines.h @@ -13,6 +13,18 @@ namespace llvm { +// CoroEarly pass marks every function that has coro.begin with a string +// attribute "coroutine.presplit"="0". CoroSplit pass processes the coroutine +// twice. First, it lets it go through complete IPO optimization pipeline as a +// single function. It forces restart of the pipeline by inserting an indirect +// call to an empty function "coro.devirt.trigger" which is devirtualized by +// CoroElide pass that triggers a restart of the pipeline by CGPassManager. +// When CoroSplit pass sees the same coroutine the second time, it splits it up, +// adds coroutine subfunctions to the SCC to be processed by IPO pipeline. +#define CORO_PRESPLIT_ATTR "coroutine.presplit" +#define UNPREPARED_FOR_SPLIT "0" +#define PREPARED_FOR_SPLIT "1" + class Pass; class PassManagerBuilder; diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Coroutines.h" using namespace llvm; @@ -2389,6 +2390,10 @@ function_ref GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { + if (Callee->hasFnAttribute(CORO_PRESPLIT_ATTR)) { + return llvm::InlineCost::getNever("unsplited coroutine call"); + } + auto UserDecision = llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI); diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -26,19 +26,6 @@ void initializeCoroElideLegacyPass(PassRegistry &); void initializeCoroCleanupLegacyPass(PassRegistry &); -// CoroEarly pass marks every function that has coro.begin with a string -// attribute "coroutine.presplit"="0". CoroSplit pass processes the coroutine -// twice. First, it lets it go through complete IPO optimization pipeline as a -// single function. It forces restart of the pipeline by inserting an indirect -// call to an empty function "coro.devirt.trigger" which is devirtualized by -// CoroElide pass that triggers a restart of the pipeline by CGPassManager. -// When CoroSplit pass sees the same coroutine the second time, it splits it up, -// adds coroutine subfunctions to the SCC to be processed by IPO pipeline. - -#define CORO_PRESPLIT_ATTR "coroutine.presplit" -#define UNPREPARED_FOR_SPLIT "0" -#define PREPARED_FOR_SPLIT "1" - #define CORO_DEVIRT_TRIGGER_FN "coro.devirt.trigger" namespace coro { diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp --- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -26,6 +26,7 @@ #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Transforms/Coroutines.h" using namespace llvm; @@ -44,7 +45,10 @@ SmallSetVector Calls; bool Changed = false; SmallVector InlinedFunctions; - for (Function &F : M) + for (Function &F : M) { + if (F.hasFnAttribute(CORO_PRESPLIT_ATTR)) { + continue; + } if (!F.isDeclaration() && F.hasFnAttribute(Attribute::AlwaysInline) && isInlineViable(F).isSuccess()) { Calls.clear(); @@ -66,6 +70,7 @@ // invalidation issues while deleting functions. InlinedFunctions.push_back(&F); } + } // Remove any live functions. erase_if(InlinedFunctions, [&](Function *F) { @@ -158,6 +163,10 @@ if (!Callee) return InlineCost::getNever("indirect call"); + if (Callee->hasFnAttribute(CORO_PRESPLIT_ATTR)) { + return InlineCost::getNever("unsplited coroutine call"); + } + // FIXME: We shouldn't even get here for declarations. if (Callee->isDeclaration()) return InlineCost::getNever("no definition"); diff --git a/llvm/test/Transforms/Coroutines/Inputs/sample.text.prof b/llvm/test/Transforms/Coroutines/Inputs/sample.text.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/Inputs/sample.text.prof @@ -0,0 +1,5 @@ +ff:152730084:141806 + 1: 123 + +foo:152730084:141806 + 65492: ff:302659 \ No newline at end of file diff --git a/llvm/test/Transforms/Coroutines/coro-inline-O0.ll b/llvm/test/Transforms/Coroutines/coro-inline-O0.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-inline-O0.ll @@ -0,0 +1,84 @@ +; RUN: opt < %s -always-inline -barrier -coro-split -coro-early -S | FileCheck %s +; RUN: opt < %s -enable-new-pm -always-inline -coro-split -coro-early -S | FileCheck %s + +; Function Attrs: alwaysinline ssp uwtable +define void @ff() #0 { +entry: + %__promise = alloca i8, align 8 + %id = call token @llvm.coro.id(i32 16, i8* %__promise, i8* null, i8* null) + %alloc = call i1 @llvm.coro.alloc(token %id) + br i1 %alloc, label %coro.alloc, label %coro.init + +coro.alloc: ; preds = %entry + %size = call i64 @llvm.coro.size.i64() + %memory = call i8* @new(i64 %size) + br label %coro.init + +coro.init: ; preds = %coro.alloc, %entry + %phi.entry.alloc = phi i8* [ null, %entry ], [ %memory, %coro.alloc ] + %begin = call i8* @llvm.coro.begin(token %id, i8* %phi.entry.alloc) + %ready = call i1 @await_ready() + br label %coro.ret + +coro.ret: ; preds = %cleanup.cont, %after.coro.free, %final.suspend, %await.suspend, %init.suspend + %end = call i1 @llvm.coro.end(i8* null, i1 false) + ret void + +unreachable: ; preds = %after.coro.free + unreachable +} + +; CHECK: call void @ff() +; Function Attrs: alwaysinline ssp uwtable +define void @foo() #0 { +entry: + %__promise = alloca i8, align 8 + %id1 = call token @llvm.coro.id(i32 16, i8* %__promise, i8* null, i8* null) + %alloc = call i1 @llvm.coro.alloc(token %id1) + br i1 %alloc, label %coro.alloc, label %coro.init + +coro.alloc: ; preds = %entry + %size = call i64 @llvm.coro.size.i64() + %memory = call i8* @new(i64 %size) + br label %coro.init + +coro.init: ; preds = %coro.alloc, %entry + %phi.entry.alloc = phi i8* [ null, %entry ], [ %memory, %coro.alloc ] + %begin = call i8* @llvm.coro.begin(token %id1, i8* %phi.entry.alloc) + %ready = call i1 @await_ready() + br label %coro.clean.up + +coro.clean.up: + call void @ff() + br label %coro.ret + +coro.ret: ; preds = %cleanup.cont, %after.coro.free, %final.suspend, %await.suspend, %init.suspend + %end = call i1 @llvm.coro.end(i8* null, i1 false) + ret void + +unreachable: ; preds = %after.coro.free + unreachable +} + +declare void @llvm.dbg.declare(metadata, metadata, metadata) +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) +declare i1 @llvm.coro.alloc(token) +declare i64 @llvm.coro.size.i64() +declare token @llvm.coro.save(i8*) +declare i8* @llvm.coro.begin(token, i8* writeonly) +declare i8 @llvm.coro.suspend(token, i1) +declare i8* @llvm.coro.free(token, i8* nocapture readonly) +declare i1 @llvm.coro.end(i8*, i1) + +declare i8* @new(i64) +declare void @delete(i8*) +declare i1 @await_ready() +declare void @await_suspend() +declare void @await_resume() +declare void @print(i32) +declare i8* @from_address(i8*) +declare void @return_void() +declare void @final_suspend() +declare void @__cxa_end_catch() + +attributes #0 = { alwaysinline ssp uwtable "coroutine.presplit"="1" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" } diff --git a/llvm/test/Transforms/Coroutines/coro-inline-O2.ll b/llvm/test/Transforms/Coroutines/coro-inline-O2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-inline-O2.ll @@ -0,0 +1,140 @@ +; RUN: opt < %s -O2 -profile-file=%S/Inputs/sample.text.prof -coro-split -coro-early -pgo-kind=pgo-sample-use-pipeline -disable-inlining=true -S | FileCheck %s +; RUN: opt < %s -enable-new-pm -O2 -profile-file=%S/Inputs/sample.text.prof -coro-early -pgo-kind=pgo-sample-use-pipeline -disable-inlining=true -S | FileCheck %s + +; Function Attrs: alwaysinline ssp uwtable +define void @ff() #0 !dbg !12 { +entry: + %__promise = alloca i8, align 8 + %id = call token @llvm.coro.id(i32 16, i8* %__promise, i8* null, i8* null) + %alloc = call i1 @llvm.coro.alloc(token %id) + br i1 %alloc, label %coro.alloc, label %coro.init + +coro.alloc: ; preds = %entry + %size = call i64 @llvm.coro.size.i64() + %memory = call i8* @new(i64 %size) + br label %coro.init + +coro.init: ; preds = %coro.alloc, %entry + %phi.entry.alloc = phi i8* [ null, %entry ], [ %memory, %coro.alloc ] + %begin = call i8* @llvm.coro.begin(token %id, i8* %phi.entry.alloc) + %ready = call i1 @await_ready() + br label %coro.ret + +coro.ret: ; preds = %cleanup.cont, %after.coro.free, %final.suspend, %await.suspend, %init.suspend + %end = call i1 @llvm.coro.end(i8* null, i1 false) + ret void + +unreachable: ; preds = %after.coro.free + unreachable +} + +; CHECK: call void @ff() +; Function Attrs: alwaysinline ssp uwtable +define void @foo() #0 !dbg !8 { +entry: + %__promise = alloca i8, align 8 + %id1 = call token @llvm.coro.id(i32 16, i8* %__promise, i8* null, i8* null) + %alloc = call i1 @llvm.coro.alloc(token %id1) + br i1 %alloc, label %coro.alloc, label %coro.init + +coro.alloc: ; preds = %entry + %size = call i64 @llvm.coro.size.i64() + %memory = call i8* @new(i64 %size) + br label %coro.init + +coro.init: ; preds = %coro.alloc, %entry + %phi.entry.alloc = phi i8* [ null, %entry ], [ %memory, %coro.alloc ] + %begin = call i8* @llvm.coro.begin(token %id1, i8* %phi.entry.alloc) + %ready = call i1 @await_ready() + br i1 %ready, label %init.ready, label %init.suspend + +init.suspend: ; preds = %coro.init + %save = call token @llvm.coro.save(i8* null) + call void @await_suspend() + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %suspend, label %coro.ret [ + i8 0, label %init.ready + i8 1, label %init.cleanup + ] + +init.cleanup: ; preds = %init.suspend + br label %cleanup + +init.ready: ; preds = %init.suspend, %coro.init + call void @await_resume() + %ready.again = call zeroext i1 @await_ready() + br i1 %ready.again, label %await.ready, label %await.suspend + +await.suspend: ; preds = %init.ready + %save.again = call token @llvm.coro.save(i8* null) + %from.address = call i8* @from_address(i8* %begin) + call void @await_suspend() + %suspend.again = call i8 @llvm.coro.suspend(token %save.again, i1 false) + switch i8 %suspend.again, label %coro.ret [ + i8 0, label %await.ready + i8 1, label %await.cleanup + ] + +await.cleanup: ; preds = %await.suspend + br label %cleanup + +await.ready: ; preds = %await.suspend, %init.ready + call void @await_resume() + call void @return_void() + br label %coro.final + +coro.final: ; preds = %await.ready + call void @final_suspend() + %coro.final.await_ready = call i1 @await_ready() + br label %cleanup + +cleanup: ; preds = %final.ready, %final.cleanup, %await.cleanup, %init.cleanup + call void @ff(), !dbg !11 + br label %coro.ret + +coro.ret: ; preds = %cleanup.cont, %after.coro.free, %final.suspend, %await.suspend, %init.suspend + %end = call i1 @llvm.coro.end(i8* null, i1 false) + ret void + +unreachable: ; preds = %after.coro.free + unreachable +} + +declare void @llvm.dbg.declare(metadata, metadata, metadata) +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) +declare i1 @llvm.coro.alloc(token) +declare i64 @llvm.coro.size.i64() +declare token @llvm.coro.save(i8*) +declare i8* @llvm.coro.begin(token, i8* writeonly) +declare i8 @llvm.coro.suspend(token, i1) +declare i8* @llvm.coro.free(token, i8* nocapture readonly) +declare i1 @llvm.coro.end(i8*, i1) + +declare i8* @new(i64) +declare void @delete(i8*) +declare i1 @await_ready() +declare void @await_suspend() +declare void @await_resume() +declare void @print(i32) +declare i8* @from_address(i8*) +declare void @return_void() +declare void @final_suspend() +declare void @__cxa_end_catch() + +attributes #0 = { alwaysinline ssp uwtable "coroutine.presplit"="1" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None, sysroot: "/") +!1 = !DIFile(filename: "inline_O2.cpp", directory: "") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{i32 7, !"PIC Level", i32 2} +!8 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !1, file: !1, line: 46, type: !9, scopeLine: 46, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!9 = !DISubroutineType(types: !10) +!10 = !{null} +!11 = !DILocation(line: 2, column: 0, scope: !8) +!12 = distinct !DISubprogram(name: "ff", linkageName: "ff", scope: !1, file: !1, line: 46, type: !9, scopeLine: 46, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)