Index: llvm/docs/Coroutines.rst =================================================================== --- llvm/docs/Coroutines.rst +++ llvm/docs/Coroutines.rst @@ -1723,6 +1723,82 @@ This pass runs late to lower all coroutine related intrinsics not replaced by earlier passes. +Coroutine Related Attributes +============================ + +always_complete_coroutine +------------------------- + +The ``always_complete_coroutine`` attribute is a function attribute indicates +that the coroutine will always complete. This is supported by switch-resumed +coroutine only now. + +For a ``always_complete_coroutine`` coroutine, the following assumptions are +guaranteed by the frontend: +- When the destroy function get called, the suspend index must be in the final + suspend position. +- If the coroutine suspended at a non final suspended point, it is guaranteed to + be resumed. For example, the splitted coroutine may like: + +.. code-block:: llvm + + %coro_index = load i64, ptr %frame.index_addr + switch i64 %coro_index, label %unreachable [i64 0, label %bb0 + i64 1, label %bb1 + i64 2, label %bb2 + ... + i64 n, label bb_n] + + bb0: + ... + %coro_next_index = add i64 %coro_index, 1 + store i64 %coro_next_index, ptr %frame.index_addr + ret + + bb1: + ... + %coro_next_index = add i64 %coro_index, 1 + store i64 %coro_next_index, ptr %frame.index_addr + ret + + ... + +If the coroutine is marked with ``always_complete_coroutine``, then the optimizer +are allowed to envision the ``ret`` in ``bb{i} (i != n)`` as branch instruction to +the next block ``bb{i+1}`` with a unknown function call, just like: + +.. code-block:: llvm + + %coro_index = load i64, ptr %frame.index_addr + switch i64 %coro_index, label %unreachable [i64 0, label %bb0 + i64 1, label %bb1 + i64 2, label %bb2 + ... + i64 n, label bb_n] + + bb0: + ... + %coro_next_index = add i64 %coro_index, 1 + store i64 %coro_next_index, ptr %frame.index_addr + call void @unknown_func(ptr %frame); + br label %bb1 + + bb1: + ... + %coro_next_index = add i64 %coro_index, 1 + store i64 %coro_next_index, ptr %frame.index_addr + call void @unknown_func(ptr %frame); + br label %bb2 + + ... + + bb_n: + ... + ret + +So the ``always_complete_coroutine`` attribute can bring new control flow information +that we can't get by tranditional analysis. + Areas Requiring Attention ========================= #. When coro.suspend returns -1, the coroutine is suspended, and it's possible Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -1615,6 +1615,11 @@ ``CallSite.Args[EltSizeParam] * CallSite.Args[NumEltsParam]`` bytes are available. The referenced parameters must be integer types. No assumptions are made about the contents of the returned block of memory. +``always_complete_coroutine`` + This attribute indicates that the coroutine will always complete. + Then the compiler can perform optimization based on the assumption. + See :doc:`Coroutines` for detial. + If the attribute was applied on a non-coroutine, it does nothing. ``alwaysinline`` This attribute indicates that the inliner should attempt to inline this function into callers whenever possible, ignoring any active Index: llvm/include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -690,6 +690,7 @@ ATTR_KIND_PRESPLIT_COROUTINE = 83, ATTR_KIND_FNRETTHUNK_EXTERN = 84, ATTR_KIND_SKIP_PROFILE = 85, + ATTR_KIND_ALWAYS_COMPLETE_COROUTINE = 86, }; enum ComdatSelectionKindCodes { Index: llvm/include/llvm/IR/Attributes.td =================================================================== --- llvm/include/llvm/IR/Attributes.td +++ llvm/include/llvm/IR/Attributes.td @@ -314,6 +314,9 @@ /// Function is a presplit coroutine. def PresplitCoroutine : EnumAttr<"presplitcoroutine", [FnAttr]>; +/// The coroutine is guaranteed to always complete. +def AlwaysCompleteCoroutine : EnumAttr<"always_complete_coroutine", [FnAttr]>; + /// Target-independent string attributes. def LessPreciseFPMAD : StrBoolAttr<"less-precise-fpmad">; def NoInfsFPMath : StrBoolAttr<"no-infs-fp-math">; Index: llvm/include/llvm/IR/Function.h =================================================================== --- llvm/include/llvm/IR/Function.h +++ llvm/include/llvm/IR/Function.h @@ -491,6 +491,14 @@ void setPresplitCoroutine() { addFnAttr(Attribute::PresplitCoroutine); } void setSplittedCoroutine() { removeFnAttr(Attribute::PresplitCoroutine); } + /// Determine if the function is a always complete coroutine. + bool isAlwaysCompleteCoroutine() const { + return hasFnAttribute(Attribute::AlwaysCompleteCoroutine); + } + void setAlwaysPresplitCoroutine() { + addFnAttr(Attribute::AlwaysCompleteCoroutine); + } + /// Determine if the function does not access memory. bool doesNotAccessMemory() const { return hasFnAttribute(Attribute::ReadNone); Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2015,6 +2015,8 @@ return Attribute::Hot; case bitc::ATTR_KIND_PRESPLIT_COROUTINE: return Attribute::PresplitCoroutine; + case bitc::ATTR_KIND_ALWAYS_COMPLETE_COROUTINE: + return Attribute::AlwaysCompleteCoroutine; } } Index: llvm/lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -782,6 +782,8 @@ return bitc::ATTR_KIND_MUSTPROGRESS; case Attribute::PresplitCoroutine: return bitc::ATTR_KIND_PRESPLIT_COROUTINE; + case Attribute::AlwaysCompleteCoroutine: + return bitc::ATTR_KIND_ALWAYS_COMPLETE_COROUTINE; case Attribute::EndAttrKinds: llvm_unreachable("Can not encode end-attribute kinds marker."); case Attribute::None: Index: llvm/lib/Transforms/Coroutines/CoroSplit.cpp =================================================================== --- llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -473,12 +473,23 @@ assert(Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.HasFinalSuspend); - if (isSwitchDestroyFunction() && Shape.SwitchLowering.HasUnwindCoroEnd) - return; - auto *Switch = cast(VMap[Shape.SwitchLowering.ResumeSwitch]); auto FinalCaseIt = std::prev(Switch->case_end()); BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor(); + + if (isSwitchDestroyFunction() && NewF->isAlwaysCompleteCoroutine()) { + // If the coroutine is always complete coroutine, we know the destroy + // function will only be called when the coroutine is done. So we could + // eliminate other branches. + Builder.SetInsertPoint(Switch); + Builder.CreateBr(ResumeBB); + Switch->eraseFromParent(); + return; + } + + if (isSwitchDestroyFunction() && Shape.SwitchLowering.HasUnwindCoroEnd) + return; + Switch->removeCase(FinalCaseIt); if (isSwitchDestroyFunction()) { BasicBlock *OldSwitchBB = Switch->getParent(); Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -922,6 +922,7 @@ case Attribute::WriteOnly: case Attribute::AllocKind: case Attribute::PresplitCoroutine: + case Attribute::AlwaysCompleteCoroutine: continue; // Those attributes should be safe to propagate to the extracted function. case Attribute::AlwaysInline: Index: llvm/test/Transforms/Coroutines/coro-always_complete_coroutine.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-always_complete_coroutine.ll @@ -0,0 +1,139 @@ +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse,sccp,simplifycfg,early-cse' -S | FileCheck %s + +%"struct.std::coroutine_traits::promise_type" = type { i8 } +%"struct.std::coroutine_handle" = type { i8 } +%struct.Cleanup = type { i8 } + +; Function Attrs: presplitcoroutine always_complete_coroutine uwtable +define dso_local void @_Z1fv() #0 personality ptr @__gxx_personality_v0 { +entry: + %__promise = alloca %"struct.std::coroutine_traits::promise_type", align 1 + %agg.tmp = alloca %"struct.std::coroutine_handle", align 1 + %cleanup4 = alloca %struct.Cleanup, align 1 + %agg.tmp7 = alloca %"struct.std::coroutine_handle", align 1 + %agg.tmp19 = alloca %"struct.std::coroutine_handle", align 1 + %0 = bitcast ptr %__promise to ptr + %1 = call token @llvm.coro.id(i32 16, ptr %0, ptr @_Z1fv, ptr null) + %2 = call i1 @llvm.coro.alloc(token %1) + br i1 %2, label %coro.alloc, label %coro.init + +coro.alloc: ; preds = %entry + %3 = call i64 @llvm.coro.size.i64() + %call = call noalias noundef nonnull ptr @_Znwm(i64 noundef %3) #8 + br label %coro.init + +coro.init: ; preds = %coro.alloc, %entry + %4 = phi ptr [ null, %entry ], [ %call, %coro.alloc ] + %5 = call ptr @llvm.coro.begin(token %1, ptr %4) #9 + call void @llvm.lifetime.start.p0(i64 1, ptr %__promise) #2 + %6 = call token @llvm.coro.save(ptr null) + call void @_ZNSt16coroutine_handleINSt16coroutine_traitsIJvEE12promise_typeEE12from_addressEPv(ptr noundef %5) #2 + call void @_ZNSt16coroutine_handleIvEC1INSt16coroutine_traitsIJvEE12promise_typeEEES_IT_E(ptr noundef nonnull align 1 dereferenceable(1) %agg.tmp) #2 + %7 = call i8 @llvm.coro.suspend(token %6, i1 false) + switch i8 %7, label %coro.ret [ + i8 0, label %init.ready + i8 1, label %cleanup + ] + +init.ready: ; preds = %coro.init + br label %cleanup + +cleanup: ; preds = %init.ready, %coro.init + %cleanup.dest.slot.0 = phi i32 [ 0, %init.ready ], [ 2, %coro.init ] + %cond = icmp eq i32 %cleanup.dest.slot.0, 0 + br i1 %cond, label %cleanup.cont, label %cleanup25 + +cleanup.cont: ; preds = %cleanup + call void @llvm.lifetime.start.p0(i64 1, ptr %cleanup4) #2 + %8 = call token @llvm.coro.save(ptr null) + call void @_ZNSt16coroutine_handleINSt16coroutine_traitsIJvEE12promise_typeEE12from_addressEPv(ptr noundef %5) #2 + call void @_ZNSt16coroutine_handleIvEC1INSt16coroutine_traitsIJvEE12promise_typeEEES_IT_E(ptr noundef nonnull align 1 dereferenceable(1) %agg.tmp7) #2 + %9 = call i8 @llvm.coro.suspend(token %8, i1 false) + switch i8 %9, label %coro.ret [ + i8 0, label %await.ready + i8 1, label %cleanup10 + ] + +await.ready: ; preds = %cleanup.cont + br label %cleanup10 + +cleanup10: ; preds = %await.ready, %cleanup.cont + %cleanup.dest.slot.1 = phi i32 [ 0, %await.ready ], [ 2, %cleanup.cont ] + %cond1 = icmp eq i32 %cleanup.dest.slot.1, 0 + %spec.select = select i1 %cond1, i32 3, i32 %cleanup.dest.slot.1 + call void @_ZN7CleanupD1Ev(ptr noundef nonnull align 1 dereferenceable(1) %cleanup4) #2 + call void @llvm.lifetime.end.p0(i64 1, ptr %cleanup4) #2 + %cond2 = icmp eq i32 %spec.select, 3 + br i1 %cond2, label %coro.final, label %cleanup25 + +coro.final: ; preds = %cleanup10 + %10 = call token @llvm.coro.save(ptr null) + call void @_ZNSt16coroutine_handleINSt16coroutine_traitsIJvEE12promise_typeEE12from_addressEPv(ptr noundef %5) #2 + call void @_ZNSt16coroutine_handleIvEC1INSt16coroutine_traitsIJvEE12promise_typeEEES_IT_E(ptr noundef nonnull align 1 dereferenceable(1) %agg.tmp19) #2 + %11 = call i8 @llvm.coro.suspend(token %10, i1 true) #9 + switch i8 %11, label %coro.ret [ + i8 0, label %cleanup22 + i8 1, label %cleanup22 + ] + +cleanup22: ; preds = %coro.final, %coro.final + br label %cleanup25 + +cleanup25: ; preds = %cleanup22, %cleanup10, %cleanup + call void @llvm.lifetime.end.p0(i64 1, ptr %__promise) #2 + %12 = call ptr @llvm.coro.free(token %1, ptr %5) + %13 = icmp ne ptr %12, null + br i1 %13, label %coro.free, label %coro.ret + +coro.free: ; preds = %cleanup25 + call void @_ZdlPv(ptr noundef %12) #2 + br label %coro.ret + +coro.ret: ; preds = %coro.free, %cleanup25, %coro.final, %cleanup.cont, %coro.init + %14 = call i1 @llvm.coro.end(ptr null, i1 false) #9 + ret void +} + +; CHECK: define{{.*}}void @_Z1fv.destroy +; CHECK-NEXT: entry.destroy: +; CHECK-NEXT: call void @_ZdlPv +; CHECK-NEXT: ret + +; CHECK: define{{.*}}void @_Z1fv.cleanup +; CHECK-NEXT: entry.cleanup: +; CHECK-NEXT: ret + +declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare dso_local noundef nonnull ptr @_Znwm(i64 noundef) #3 +declare i64 @llvm.coro.size.i64() #4 +declare ptr @llvm.coro.begin(token, ptr writeonly) #2 +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #5 +declare dso_local i32 @__gxx_personality_v0(...) +declare token @llvm.coro.save(ptr) #6 +declare dso_local void @_ZNSt16coroutine_handleINSt16coroutine_traitsIJvEE12promise_typeEE12from_addressEPv(ptr noundef) #2 +declare dso_local void @_ZNSt16coroutine_handleIvEC1INSt16coroutine_traitsIJvEE12promise_typeEEES_IT_E(ptr noundef nonnull align 1 dereferenceable(1)) unnamed_addr #2 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #5 +declare dso_local void @_ZN7CleanupD1Ev(ptr noundef nonnull align 1 dereferenceable(1)) unnamed_addr #2 +declare dso_local void @_ZdlPv(ptr noundef) #7 +declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1 +declare i1 @llvm.coro.end(ptr, i1) #2 + +attributes #0 = { presplitcoroutine always_complete_coroutine uwtable } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nobuiltin allocsize(0) } +attributes #4 = { nounwind readnone } +attributes #5 = { argmemonly nocallback nofree nosync nounwind willreturn } +attributes #6 = { nomerge nounwind } +attributes #7 = { nobuiltin nounwind } +attributes #8 = { allocsize(0) } +attributes #9 = { noduplicate } + +!llvm.linker.options = !{} +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"uwtable", i32 2}