diff --git a/llvm/include/llvm/Transforms/Coroutines.h b/llvm/include/llvm/Transforms/Coroutines.h --- a/llvm/include/llvm/Transforms/Coroutines.h +++ b/llvm/include/llvm/Transforms/Coroutines.h @@ -14,6 +14,18 @@ namespace llvm { +// CoroEarly pass marks every function that has coro.begin with a string +// attribute "coroutine.presplit"="0". CoroSplit pass processes the coroutine +// twice. First, it lets it go through complete IPO optimization pipeline as a +// single function. It forces restart of the pipeline by inserting an indirect +// call to an empty function "coro.devirt.trigger" which is devirtualized by +// CoroElide pass that triggers a restart of the pipeline by CGPassManager. +// When CoroSplit pass sees the same coroutine the second time, it splits it up, +// adds coroutine subfunctions to the SCC to be processed by IPO pipeline. +#define CORO_PRESPLIT_ATTR "coroutine.presplit" +#define UNPREPARED_FOR_SPLIT "0" +#define PREPARED_FOR_SPLIT "1" + class Pass; class PassManagerBuilder; diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Coroutines.h" using namespace llvm; @@ -2006,6 +2007,9 @@ if (!Callee) return llvm::InlineCost::getNever("indirect call"); + if (Callee->hasFnAttribute(CORO_PRESPLIT_ATTR)) + return llvm::InlineCost::getNever("unsplited coroutine call"); + // Never inline calls with byval arguments that does not have the alloca // address space. Since byval arguments can be replaced with a copy to an // alloca, the inlined code would need to be adjusted to handle that the diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -27,19 +27,6 @@ void initializeCoroElidePass(PassRegistry &); void initializeCoroCleanupPass(PassRegistry &); -// CoroEarly pass marks every function that has coro.begin with a string -// attribute "coroutine.presplit"="0". CoroSplit pass processes the coroutine -// twice. First, it lets it go through complete IPO optimization pipeline as a -// single function. It forces restart of the pipeline by inserting an indirect -// call to an empty function "coro.devirt.trigger" which is devirtualized by -// CoroElide pass that triggers a restart of the pipeline by CGPassManager. -// When CoroSplit pass sees the same coroutine the second time, it splits it up, -// adds coroutine subfunctions to the SCC to be processed by IPO pipeline. - -#define CORO_PRESPLIT_ATTR "coroutine.presplit" -#define UNPREPARED_FOR_SPLIT "0" -#define PREPARED_FOR_SPLIT "1" - #define CORO_DEVIRT_TRIGGER_FN "coro.devirt.trigger" namespace coro { diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp --- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -27,6 +27,7 @@ #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Transforms/Coroutines.h" using namespace llvm; @@ -37,7 +38,10 @@ SmallSetVector Calls; bool Changed = false; SmallVector InlinedFunctions; - for (Function &F : M) + for (Function &F : M) { + if (F.hasFnAttribute(CORO_PRESPLIT_ATTR)) + continue; + if (!F.isDeclaration() && F.hasFnAttribute(Attribute::AlwaysInline) && isInlineViable(F)) { Calls.clear(); @@ -58,6 +62,7 @@ // invalidation issues while deleting functions. InlinedFunctions.push_back(&F); } + } // Remove any live functions. erase_if(InlinedFunctions, [&](Function *F) { @@ -145,10 +150,16 @@ InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallSite CS) { Function *Callee = CS.getCalledFunction(); + if (!Callee) + return InlineCost::getNever("always inliner"); + + if (Callee->hasFnAttribute(CORO_PRESPLIT_ATTR)) + return InlineCost::getNever("unsplited coroutine call"); + // Only inline direct calls to functions with always-inline attributes // that are viable for inlining. FIXME: We shouldn't even get here for // declarations. - if (Callee && !Callee->isDeclaration() && + if (!Callee->isDeclaration() && CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee)) return InlineCost::getAlways("always inliner"); diff --git a/llvm/test/Transforms/Coroutines/Inputs/sample.text.prof b/llvm/test/Transforms/Coroutines/Inputs/sample.text.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/Inputs/sample.text.prof @@ -0,0 +1,5 @@ +ff:152730084:141806 + 1: 123 + +foo:152730084:141806 + 65492: ff:302659 \ No newline at end of file diff --git a/llvm/test/Transforms/Coroutines/coro-inline.ll b/llvm/test/Transforms/Coroutines/coro-inline.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-inline.ll @@ -0,0 +1,41 @@ +; RUN: opt < %s -always-inline -barrier -coro-early -barrier -coro-split -S | FileCheck %s +; RUN: opt < %s -sample-profile-file=%S/Inputs/sample.text.prof -coro-early -barrier -sample-profile -barrier -coro-split -disable-inlining=true -S | FileCheck %s + +; Function Attrs: alwaysinline ssp uwtable +define void @ff() #0 !dbg !12 { +entry: + %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null) + %begin = call i8* @llvm.coro.begin(token %id, i8* null) + ret void +} + +; CHECK: call void @ff() +; Function Attrs: alwaysinline ssp uwtable +define void @foo() #0 !dbg !8 { +entry: + %id1 = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null) + %begin = call i8* @llvm.coro.begin(token %id1, i8* null) + call void @ff(), !dbg !11 + ret void +} + +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) +declare i8* @llvm.coro.begin(token, i8* writeonly) + +attributes #0 = { alwaysinline ssp uwtable "coroutine.presplit"="1" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "inline_O2.cpp", directory: "") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{i32 7, !"PIC Level", i32 2} +!8 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !1, file: !1, line: 46, type: !9, scopeLine: 46, flags: DIFlagPrototyped, unit: !0, retainedNodes: !2) +!9 = !DISubroutineType(types: !10) +!10 = !{null} +!11 = !DILocation(line: 2, column: 0, scope: !8) +!12 = distinct !DISubprogram(name: "ff", linkageName: "ff", scope: !1, file: !1, line: 46, type: !9, scopeLine: 46, flags: DIFlagPrototyped, unit: !0, retainedNodes: !2)