Index: llvm/include/llvm/Transforms/Coroutines.h =================================================================== --- llvm/include/llvm/Transforms/Coroutines.h +++ llvm/include/llvm/Transforms/Coroutines.h @@ -23,7 +23,7 @@ Pass *createCoroEarlyLegacyPass(); /// Split up coroutines into multiple functions driving their state machines. -Pass *createCoroSplitLegacyPass(bool ReuseFrameSlot = false); +Pass *createCoroSplitLegacyPass(bool IsOptimizing = false); /// Analyze coroutines use sites, devirtualize resume/destroy calls and elide /// heap allocation for coroutine frame where possible. Index: llvm/include/llvm/Transforms/Coroutines/CoroSplit.h =================================================================== --- llvm/include/llvm/Transforms/Coroutines/CoroSplit.h +++ llvm/include/llvm/Transforms/Coroutines/CoroSplit.h @@ -22,13 +22,14 @@ namespace llvm { struct CoroSplitPass : PassInfoMixin { - CoroSplitPass(bool ReuseFrameSlot = false) : ReuseFrameSlot(ReuseFrameSlot) {} + CoroSplitPass(bool IsOptimizing = false) : IsOptimizing(IsOptimizing) {} PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); static bool isRequired() { return true; } - bool ReuseFrameSlot; + // Would be true if the Optimization level isn't O0. + bool IsOptimizing; }; } // end namespace llvm Index: llvm/lib/Transforms/Coroutines/CoroFrame.cpp =================================================================== --- llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -41,11 +41,9 @@ // "coro-frame", which results in leaner debug spew. #define DEBUG_TYPE "coro-suspend-crossing" -static cl::opt EnableReuseStorageInFrame( - "reuse-storage-in-coroutine-frame", cl::Hidden, - cl::desc( - "Enable the optimization which would reuse the storage in the coroutine \ - frame for allocas whose liferanges are not overlapped, for testing purposes"), +static cl::opt EnableCoroutineOptimization( + "enable-coroutine-optimization", cl::Hidden, + cl::desc("Enable the optimization, for testing purposes only."), llvm::cl::init(false)); enum { SmallVectorThreshold = 32 }; @@ -526,7 +524,7 @@ } }); - if (!Shape.ReuseFrameSlot && !EnableReuseStorageInFrame) { + if (!Shape.IsOptimizing && !EnableCoroutineOptimization) { for (const auto &A : FrameData.Allocas) { AllocaInst *Alloca = A.Alloca; NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); @@ -1217,7 +1215,7 @@ &*Builder.GetInsertPoint()); // This dbg.declare is for the main function entry point. It // will be deleted in all coro-split functions. - coro::salvageDebugInfo(DbgPtrAllocaCache, DDI); + coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.IsOptimizing); } } @@ -2151,7 +2149,7 @@ void coro::salvageDebugInfo( SmallDenseMap &DbgPtrAllocaCache, - DbgDeclareInst *DDI) { + DbgDeclareInst *DDI, bool IsOptimizing) { Function *F = DDI->getFunction(); IRBuilder<> Builder(F->getContext()); auto InsertPt = F->getEntryBlock().getFirstInsertionPt(); @@ -2192,24 +2190,28 @@ // is available throughout the function when producing unoptimized // code. Extending the lifetime this way is correct because the // variable has been declared by a dbg.declare intrinsic. - if (auto Arg = dyn_cast_or_null(Storage)) { - auto &Cached = DbgPtrAllocaCache[Storage]; - if (!Cached) { - Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr, - Arg->getName() + ".debug"); - Builder.CreateStore(Storage, Cached); + // + // Avoid to create the alloca would be eliminated by optimization + // passes and the corresponding dbg.declares would be invalid. + if (!IsOptimizing && !EnableCoroutineOptimization) + if (auto *Arg = dyn_cast_or_null(Storage)) { + auto &Cached = DbgPtrAllocaCache[Storage]; + if (!Cached) { + Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr, + Arg->getName() + ".debug"); + Builder.CreateStore(Storage, Cached); + } + Storage = Cached; + // FIXME: LLVM lacks nuanced semantics to differentiate between + // memory and direct locations at the IR level. The backend will + // turn a dbg.declare(alloca, ..., DIExpression()) into a memory + // location. Thus, if there are deref and offset operations in the + // expression, we need to add a DW_OP_deref at the *start* of the + // expression to first load the contents of the alloca before + // adjusting it with the expression. + if (Expr && Expr->isComplex()) + Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore); } - Storage = Cached; - // FIXME: LLVM lacks nuanced semantics to differentiate between - // memory and direct locations at the IR level. The backend will - // turn a dbg.declare(alloca, ..., DIExpression()) into a memory - // location. Thus, if there are deref and offset operations in the - // expression, we need to add a DW_OP_deref at the *start* of the - // expression to first load the contents of the alloca before - // adjusting it with the expression. - if (Expr && Expr->isComplex()) - Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore); - } auto &VMContext = DDI->getFunction()->getContext(); DDI->setOperand( 0, MetadataAsValue::get(VMContext, ValueAsMetadata::get(Storage))); Index: llvm/lib/Transforms/Coroutines/CoroInternal.h =================================================================== --- llvm/lib/Transforms/Coroutines/CoroInternal.h +++ llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -54,7 +54,7 @@ /// holding a pointer to the coroutine frame. void salvageDebugInfo( SmallDenseMap &DbgPtrAllocaCache, - DbgDeclareInst *DDI); + DbgDeclareInst *DDI, bool IsOptimizing); // Keeps data and helper functions for lowering coroutine intrinsics. struct LowererBase { @@ -125,7 +125,7 @@ Instruction *FramePtr; BasicBlock *AllocaSpillBlock; - bool ReuseFrameSlot; + bool IsOptimizing; struct SwitchLoweringStorage { SwitchInst *ResumeSwitch; @@ -269,8 +269,8 @@ void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const; Shape() = default; - explicit Shape(Function &F, bool ReuseFrameSlot = false) - : ReuseFrameSlot(ReuseFrameSlot) { + explicit Shape(Function &F, bool IsOptimizing = false) + : IsOptimizing(IsOptimizing) { buildFrom(F); } void buildFrom(Function &F); Index: llvm/lib/Transforms/Coroutines/CoroSplit.cpp =================================================================== --- llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -654,7 +654,7 @@ if (auto *DDI = dyn_cast(&I)) Worklist.push_back(DDI); for (DbgDeclareInst *DDI : Worklist) - coro::salvageDebugInfo(DbgPtrAllocaCache, DDI); + coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.IsOptimizing); // Remove all salvaged dbg.declare intrinsics that became // either unreachable or stale due to the CoroSplit transformation. @@ -662,6 +662,14 @@ return BB->hasNPredecessors(0) && BB != &NewF->getEntryBlock(); }; for (DbgDeclareInst *DDI : Worklist) { + auto *Storage = DDI->getAddress(); + if (isa(Storage)) { + auto &Entry = NewF->getEntryBlock(); + auto *InsertPt = Entry.getFirstNonPHI(); + DDI->moveAfter(InsertPt); + continue; + } + if (IsUnreachableBlock(DDI->getParent())) DDI->eraseFromParent(); else if (dyn_cast_or_null(DDI->getAddress())) { @@ -1731,14 +1739,14 @@ static coro::Shape splitCoroutine(Function &F, SmallVectorImpl &Clones, - bool ReuseFrameSlot) { + bool IsOptimizing) { PrettyStackTraceFunction prettyStackTrace(F); // The suspend-crossing algorithm in buildCoroutineFrame get tripped // up by uses in unreachable blocks, so remove them as a first pass. removeUnreachableBlocks(F); - coro::Shape Shape(F, ReuseFrameSlot); + coro::Shape Shape(F, IsOptimizing); if (!Shape.CoroBegin) return Shape; @@ -2078,7 +2086,7 @@ F.removeFnAttr(CORO_PRESPLIT_ATTR); SmallVector Clones; - const coro::Shape Shape = splitCoroutine(F, Clones, ReuseFrameSlot); + const coro::Shape Shape = splitCoroutine(F, Clones, IsOptimizing); updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM); if ((Shape.ABI == coro::ABI::Async || Shape.ABI == coro::ABI::Retcon || @@ -2112,13 +2120,13 @@ struct CoroSplitLegacy : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - CoroSplitLegacy(bool ReuseFrameSlot = false) - : CallGraphSCCPass(ID), ReuseFrameSlot(ReuseFrameSlot) { + CoroSplitLegacy(bool IsOptimizing = false) + : CallGraphSCCPass(ID), IsOptimizing(IsOptimizing) { initializeCoroSplitLegacyPass(*PassRegistry::getPassRegistry()); } bool Run = false; - bool ReuseFrameSlot; + bool IsOptimizing; // A coroutine is identified by the presence of coro.begin intrinsic, if // we don't have any, this pass has nothing to do. @@ -2177,7 +2185,7 @@ F->removeFnAttr(CORO_PRESPLIT_ATTR); SmallVector Clones; - const coro::Shape Shape = splitCoroutine(*F, Clones, ReuseFrameSlot); + const coro::Shape Shape = splitCoroutine(*F, Clones, IsOptimizing); updateCallGraphAfterCoroutineSplit(*F, Shape, Clones, CG, SCC); if (Shape.ABI == coro::ABI::Async) { // Restart SCC passes. @@ -2214,6 +2222,6 @@ "Split coroutine into a set of functions driving its state machine", false, false) -Pass *llvm::createCoroSplitLegacyPass(bool ReuseFrameSlot) { - return new CoroSplitLegacy(ReuseFrameSlot); +Pass *llvm::createCoroSplitLegacyPass(bool IsOptimizing) { + return new CoroSplitLegacy(IsOptimizing); } Index: llvm/test/Transforms/Coroutines/coro-debug-O2.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-debug-O2.ll @@ -0,0 +1,172 @@ +; RUN: opt < %s -coro-early -coro-split -enable-coroutine-optimization -coro-split -sroa -S | FileCheck %s + +; Checks whether the dbg.declare for `__promise` remains valid under O2. + +; CHECK-LABEL: define internal fastcc void @f.resume({{.*}}) +; CHECK: entry.resume: +; CHECK: call void @llvm.dbg.declare(metadata %f.Frame* %FramePtr, metadata ![[PROMISEVAR_RESUME:[0-9]+]], metadata !DIExpression( +; +; CHECK: ![[PROMISEVAR_RESUME]] = !DILocalVariable(name: "__promise" +%promise_type = type { i32, i32, double } + +define void @f() !dbg !8 { +entry: + %__promise = alloca %promise_type, align 8 + %0 = bitcast %promise_type* %__promise to i8* + %id = call token @llvm.coro.id(i32 16, i8* %0, i8* null, i8* null) + %alloc = call i1 @llvm.coro.alloc(token %id) + br i1 %alloc, label %coro.alloc, label %coro.init + +coro.alloc: ; preds = %entry + %size = call i64 @llvm.coro.size.i64() + %memory = call i8* @new(i64 %size) + br label %coro.init + +coro.init: ; preds = %coro.alloc, %entry + %phi.entry.alloc = phi i8* [ null, %entry ], [ %memory, %coro.alloc ] + %begin = call i8* @llvm.coro.begin(token %id, i8* %phi.entry.alloc) + call void @llvm.dbg.declare(metadata %promise_type* %__promise, metadata !6, metadata !DIExpression()), !dbg !18 + %i.i = getelementptr inbounds %promise_type, %promise_type* %__promise, i64 0, i32 0 + store i32 1, i32* %i.i, align 8 + %j.i = getelementptr inbounds %promise_type, %promise_type* %__promise, i64 0, i32 1 + store i32 2, i32* %j.i, align 4 + %k.i = getelementptr inbounds %promise_type, %promise_type* %__promise, i64 0, i32 2 + store double 3.000000e+00, double* %k.i, align 8 + %ready = call i1 @await_ready() + br i1 %ready, label %init.ready, label %init.suspend + +init.suspend: ; preds = %coro.init + %save = call token @llvm.coro.save(i8* null) + call void @await_suspend() + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %suspend, label %coro.ret [ + i8 0, label %init.ready + i8 1, label %init.cleanup + ] + +init.cleanup: ; preds = %init.suspend + br label %cleanup + +init.ready: ; preds = %init.suspend, %coro.init + call void @await_resume() + %ready.again = call zeroext i1 @await_ready() + br i1 %ready.again, label %await.ready, label %await.suspend + +await.suspend: ; preds = %init.ready + %save.again = call token @llvm.coro.save(i8* null) + %from.address = call i8* @from_address(i8* %begin) + call void @await_suspend() + %suspend.again = call i8 @llvm.coro.suspend(token %save.again, i1 false) + switch i8 %suspend.again, label %coro.ret [ + i8 0, label %await.ready + i8 1, label %await.cleanup + ] + +await.cleanup: ; preds = %await.suspend + br label %cleanup + +await.ready: ; preds = %await.suspend, %init.ready + call void @await_resume() + call void @return_void() + br label %coro.final + +coro.final: ; preds = %await.ready + call void @final_suspend() + %coro.final.await_ready = call i1 @await_ready() + br i1 %coro.final.await_ready, label %final.ready, label %final.suspend + +final.suspend: ; preds = %coro.final + %final.suspend.coro.save = call token @llvm.coro.save(i8* null) + %final.suspend.from_address = call i8* @from_address(i8* %begin) + call void @await_suspend() + %final.suspend.coro.suspend = call i8 @llvm.coro.suspend(token %final.suspend.coro.save, i1 true) + switch i8 %final.suspend.coro.suspend, label %coro.ret [ + i8 0, label %final.ready + i8 1, label %final.cleanup + ] + +final.cleanup: ; preds = %final.suspend + br label %cleanup + +final.ready: ; preds = %final.suspend, %coro.final + call void @await_resume() + br label %cleanup + +cleanup: ; preds = %final.ready, %final.cleanup, %await.cleanup, %init.cleanup + %cleanup.dest.slot.0 = phi i32 [ 0, %final.ready ], [ 2, %final.cleanup ], [ 2, %await.cleanup ], [ 2, %init.cleanup ] + %free.memory = call i8* @llvm.coro.free(token %id, i8* %begin) + %free = icmp ne i8* %free.memory, null + br i1 %free, label %coro.free, label %after.coro.free + +coro.free: ; preds = %cleanup + call void @delete(i8* %free.memory) + br label %after.coro.free + +after.coro.free: ; preds = %coro.free, %cleanup + switch i32 %cleanup.dest.slot.0, label %unreachable [ + i32 0, label %cleanup.cont + i32 2, label %coro.ret + ] + +cleanup.cont: ; preds = %after.coro.free + br label %coro.ret + +coro.ret: ; preds = %cleanup.cont, %after.coro.free, %final.suspend, %await.suspend, %init.suspend + %end = call i1 @llvm.coro.end(i8* null, i1 false) + ret void + +unreachable: ; preds = %after.coro.free + unreachable + +} + +declare void @llvm.dbg.declare(metadata, metadata, metadata) +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) +declare i1 @llvm.coro.alloc(token) +declare i64 @llvm.coro.size.i64() +declare token @llvm.coro.save(i8*) +declare i8* @llvm.coro.begin(token, i8* writeonly) +declare i8 @llvm.coro.suspend(token, i1) +declare i8* @llvm.coro.free(token, i8* nocapture readonly) +declare i1 @llvm.coro.end(i8*, i1) + +declare i8* @new(i64) +declare void @delete(i8*) +declare i1 @await_ready() +declare void @await_suspend() +declare void @await_resume() +declare void @print(i32) +declare i8* @from_address(i8*) +declare void @return_void() +declare void @final_suspend() + +!llvm.dbg.cu = !{!0} +!llvm.linker.options = !{} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 11.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "coro-debug.cpp", directory: ".") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{!"clang version 11.0.0"} +!6 = !DILocalVariable(name: "__promise", scope: !7, file: !1, line: 24, type: !10) +!7 = distinct !DILexicalBlock(scope: !8, file: !1, line: 23, column: 12) +!8 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !8, file: !1, line: 23, type: !9, scopeLine: 23, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!9 = !DISubroutineType(types: !2) +!10 = !DIDerivedType(tag: DW_TAG_typedef, name: "promise_type", scope: !8, file: !1, line: 15, baseType: !11) +!11 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "promise_type", scope: !8, file: !1, line: 10, size: 128, flags: DIFlagTypePassByValue | DIFlagNonTrivial, elements: !12, identifier: "_ZTSN4coro12promise_typeE") +!12 = !{!13, !14, !15} +!13 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !8, file: !1, line: 10, baseType: !16, size: 32) +!14 = !DIDerivedType(tag: DW_TAG_member, name: "j", scope: !8, file: !1, line: 10, baseType: !16, size: 32, offset: 32) +!15 = !DIDerivedType(tag: DW_TAG_member, name: "k", scope: !8, file: !1, line: 10, baseType: !17, size: 64, offset: 64) +!16 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!17 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float) +!18 = !DILocation(line: 0, scope: !7) + + + + + + Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-00.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-00.ll +++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-00.ll @@ -1,6 +1,6 @@ ; Check that we can handle spills of array allocas -; RUN: opt < %s -coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s -; RUN: opt < %s -passes=coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s +; RUN: opt < %s -coro-split -enable-coroutine-optimization -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -enable-coroutine-optimization -S | FileCheck %s %struct.big_structure = type { [500 x i8] } declare void @consume(%struct.big_structure*) Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-01.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-01.ll +++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-01.ll @@ -1,7 +1,7 @@ ; Tests that variables in a Corotuine whose lifetime range is not overlapping each other ; re-use the same slot in Coroutine frame. -; RUN: opt < %s -coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s -; RUN: opt < %s -passes=coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s +; RUN: opt < %s -coro-split -enable-coroutine-optimization -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -enable-coroutine-optimization -S | FileCheck %s %"struct.task::promise_type" = type { i8 } %struct.awaitable = type { i8 } %struct.big_structure = type { [500 x i8] } Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-02.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-02.ll +++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-02.ll @@ -1,7 +1,7 @@ ; Tests that variables of different type in a Corotuine whose lifetime range is not overlapping each other ; re-use the same slot in Coroutine frame. -; RUN: opt < %s -coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s -; RUN: opt < %s -passes=coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s +; RUN: opt < %s -coro-split -enable-coroutine-optimization -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -enable-coroutine-optimization -S | FileCheck %s %"struct.task::promise_type" = type { i8 } %struct.awaitable = type { i8 } %struct.big_structure = type { [500 x i8] } Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll +++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll @@ -1,7 +1,7 @@ ; Tests that variables of different type with incompatible alignment in a Corotuine whose lifetime ; range is not overlapping each other should not re-use the same slot in Coroutine frame. -; RUN: opt < %s -coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s -; RUN: opt < %s -passes=coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s +; RUN: opt < %s -coro-split -enable-coroutine-optimization -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -enable-coroutine-optimization -S | FileCheck %s %"struct.task::promise_type" = type { i8 } %struct.awaitable = type { i8 } %struct.big_structure = type { [500 x i8] } Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll +++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll @@ -1,7 +1,7 @@ ; Tests that variables of different type with incompatible alignment in a Corotuine whose ; lifetime range is not overlapping each other re-use the same slot in CorotuineFrame. -; RUN: opt < %s -coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s -; RUN: opt < %s -passes=coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s +; RUN: opt < %s -coro-split -enable-coroutine-optimization -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -enable-coroutine-optimization -S | FileCheck %s %"struct.task::promise_type" = type { i8 } %struct.awaitable = type { i8 } %struct.big_structure = type { [500 x i8] }