diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp --- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" #include "llvm/Support/ErrorHandling.h" @@ -46,7 +47,8 @@ AAResults &AA); bool shouldElide(Function *F, DominatorTree &DT) const; void collectPostSplitCoroIds(Function *F); - bool processCoroId(CoroIdInst *, AAResults &AA, DominatorTree &DT); + bool processCoroId(CoroIdInst *, AAResults &AA, DominatorTree &DT, + OptimizationRemarkEmitter &ORE); bool hasEscapePath(const CoroBeginInst *, const SmallPtrSetImpl &) const; }; @@ -299,7 +301,7 @@ } bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA, - DominatorTree &DT) { + DominatorTree &DT, OptimizationRemarkEmitter &ORE) { CoroBegins.clear(); CoroAllocs.clear(); ResumeAddr.clear(); @@ -343,6 +345,24 @@ replaceWithConstant(ResumeAddrConstant, ResumeAddr); bool ShouldElide = shouldElide(CoroId->getFunction(), DT); + if (!ShouldElide) + ORE.emit([&]() { + if (auto FrameSizeAndAlign = + getFrameLayout(cast(ResumeAddrConstant))) + return OptimizationRemarkMissed(DEBUG_TYPE, "CoroElide", CoroId) + << "'" << ore::NV("callee", CoroId->getCoroutine()->getName()) + << "' not elided in '" + << ore::NV("caller", CoroId->getFunction()->getName()) + << "' (frame_size=" + << ore::NV("frame_size", FrameSizeAndAlign->first) << ", align=" + << ore::NV("align", FrameSizeAndAlign->second.value()) << ")"; + else + return OptimizationRemarkMissed(DEBUG_TYPE, "CoroElide", CoroId) + << "'" << ore::NV("callee", CoroId->getCoroutine()->getName()) + << "' not elided in '" + << ore::NV("caller", CoroId->getFunction()->getName()) + << "' (frame_size=unknown, align=unknown)"; + }); auto *DestroyAddrConstant = Resumers->getAggregateElement( ShouldElide ? CoroSubFnInst::CleanupIndex : CoroSubFnInst::DestroyIndex); @@ -363,6 +383,23 @@ << "Elide " << CoroId->getCoroutine()->getName() << " in " << CoroId->getFunction()->getName() << "\n"; #endif + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "CoroElide", CoroId) + << "'" << ore::NV("callee", CoroId->getCoroutine()->getName()) + << "' elided in '" + << ore::NV("caller", CoroId->getFunction()->getName()) + << "' (frame_size=" + << ore::NV("frame_size", FrameSizeAndAlign->first) << ", align=" + << ore::NV("align", FrameSizeAndAlign->second.value()) << ")"; + }); + } else { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "CoroElide", CoroId) + << "'" << ore::NV("callee", CoroId->getCoroutine()->getName()) + << "' not elided in '" + << ore::NV("caller", CoroId->getFunction()->getName()) + << "' (frame_size=unknown, align=unknown)"; + }); } } @@ -387,10 +424,11 @@ AAResults &AA = AM.getResult(F); DominatorTree &DT = AM.getResult(F); + auto &ORE = AM.getResult(F); bool Changed = false; for (auto *CII : L.CoroIds) - Changed |= L.processCoroId(CII, AA, DT); + Changed |= L.processCoroId(CII, AA, DT, ORE); return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Argument.h" @@ -2142,11 +2143,19 @@ F.setSplittedCoroutine(); SmallVector Clones; + auto &ORE = FAM.getResult(F); const coro::Shape Shape = splitCoroutine(F, Clones, FAM.getResult(F), OptimizeFrame, MaterializableCallback); updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM); + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "CoroSplit", &F) + << "Split '" << ore::NV("function", F.getName()) + << "' (frame_size=" << ore::NV("frame_size", Shape.FrameSize) + << ", align=" << ore::NV("align", Shape.FrameAlign.value()) << ")"; + }); + if (!Shape.CoroSuspends.empty()) { // Run the CGSCC pipeline on the original and newly split functions. UR.CWorklist.insert(&C); diff --git a/llvm/test/Transforms/Coroutines/remarks.ll b/llvm/test/Transforms/Coroutines/remarks.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/remarks.ll @@ -0,0 +1,71 @@ +; Test coroutine remarks. +; RUN: opt < %s --disable-output -S -passes='default' \ +; RUN: --pass-remarks="coro-split|coro-elide" \ +; RUN: --pass-remarks-missed="coro-split|coro-elide" \ +; RUN: --pass-remarks-with-hotness 2>&1 | FileCheck %s + +; CHECK: Split 'foo' (frame_size=24, align=8) (hotness: 400) +; CHECK: 'foo' not elided in 'bar' (frame_size=24, align=8) (hotness: 100) +; CHECK: 'foo' elided in 'baz' (frame_size=24, align=8) (hotness: 200) + +define ptr @foo() presplitcoroutine !prof !0 { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id) + br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin +dyn.alloc: + %size = call i32 @llvm.coro.size.i32() + %alloc = call ptr @malloc(i32 %size) + br label %coro.begin +coro.begin: + %phi = phi ptr [ null, %entry ], [ %alloc, %dyn.alloc ] + %hdl = call ptr @llvm.coro.begin(token %id, ptr %phi) + call void @print(i32 0) + %0 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %0, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + call void @print(i32 1) + br label %cleanup + +cleanup: + %mem = call ptr @llvm.coro.free(token %id, ptr %hdl) + call void @free(ptr %mem) + br label %suspend +suspend: + call i1 @llvm.coro.end(ptr %hdl, i1 0) + ret ptr %hdl +} + +define i32 @bar() !prof !1 { +entry: + %hdl = call ptr @foo() + call void @llvm.coro.resume(ptr %hdl) + ret i32 0 +} + +define i32 @baz() !prof !2 { +entry: + %hdl = call ptr @foo() + call void @llvm.coro.destroy(ptr %hdl) + ret i32 0 +} + +declare ptr @llvm.coro.free(token, ptr) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(ptr) +declare void @llvm.coro.destroy(ptr) + +declare token @llvm.coro.id(i32, ptr, ptr, ptr) +declare i1 @llvm.coro.alloc(token) +declare ptr @llvm.coro.begin(token, ptr) +declare i1 @llvm.coro.end(ptr, i1) + +declare noalias ptr @malloc(i32) +declare void @print(i32) +declare void @free(ptr) + +!0 = !{!"function_entry_count", i64 400} +!1 = !{!"function_entry_count", i64 100} +!2 = !{!"function_entry_count", i64 200}