diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -623,6 +623,13 @@ for (auto *PM : S.getParamMoves()) { EmitStmt(PM); ParamReplacer.addCopy(cast(PM)); + auto *Alloca = cast( + GetAddrOfLocalVar(cast(cast(PM)->getSingleDecl())) + .getPointer()); + auto *Cast = new llvm::BitCastInst(Alloca, VoidPtrTy, "", + Builder.GetInsertBlock()); + Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::coro_mark_param), + {Cast}); // TODO: if(CoroParam(...)) need to surround ctor and dtor // for the copy, so that llvm can elide it if the copy is // not needed. diff --git a/clang/test/CodeGenCoroutines/Inputs/coroutine.h b/clang/test/CodeGenCoroutines/Inputs/coroutine.h --- a/clang/test/CodeGenCoroutines/Inputs/coroutine.h +++ b/clang/test/CodeGenCoroutines/Inputs/coroutine.h @@ -67,9 +67,9 @@ } struct suspend_always { - bool await_ready() { return false; } - void await_suspend(coroutine_handle<>) {} - void await_resume() {} + bool await_ready() noexcept { return false; } + void await_suspend(coroutine_handle<>) noexcept {} + void await_resume() noexcept {} }; struct suspend_never { bool await_ready() noexcept { return true; } diff --git a/clang/test/CodeGenCoroutines/coro-param-memcpy.cpp b/clang/test/CodeGenCoroutines/coro-param-memcpy.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-param-memcpy.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++17 -O1 -fno-inline -emit-llvm %s -o - | FileCheck %s + +#include "Inputs/coroutine.h" + +struct task { + struct promise_type { + task get_return_object() { return {this}; } + std::experimental::suspend_always initial_suspend() { return {}; } + std::experimental::suspend_always final_suspend() noexcept { return {}; } + void return_void() {} + void unhandled_exception() {} + }; + promise_type *promise; +}; + +namespace std::experimental { +template +struct coroutine_traits { + using promise_type = typename task::promise_type; +}; +} // namespace std::experimental + +void *g = nullptr; + +struct A { + unsigned long long a = 1; + unsigned long long b; + unsigned int c; +}; + +task foo(A a1) { + A a2 = a1; // Necessary. + g = &a2; // So the address isn't optimized out. + co_return; +} + +// verify that the entire struct param is in the frame. +// CHECK: %_Z3foo1A.Frame = type { void (%_Z3foo1A.Frame*)*, void (%_Z3foo1A.Frame*)*, %"struct.task::promise_type", i1, %"struct.std::experimental::coroutines_v1::suspend_always", [5 x i8], [24 x i8] } + +// CHECK-LABEL: define dso_local %"struct.task::promise_type"* @_Z3foo1A( +// CHECK: %[[FRAME:.+]] = call noalias nonnull i8* @_Znwm( +// CHECK: %[[PTR:.+]] = getelementptr inbounds i8, i8* %[[FRAME]], i64 24 +// CHECK: %[[PARAM:.+]] = bitcast %struct.A* %a1 to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 8 dereferenceable(24) %[[PTR]], i8* noundef nonnull align 8 dereferenceable(24) %[[PARAM]], i64 24, i1 false) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1274,6 +1274,8 @@ ReadOnly>, NoCapture>]>; +def int_coro_mark_param : Intrinsic<[], [llvm_ptr_ty], []>; + ///===-------------------------- Other Intrinsics --------------------------===// // def int_trap : Intrinsic<[], [], [IntrNoReturn, IntrCold]>, @@ -1305,8 +1307,8 @@ def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>; // The pseudoprobe intrinsic works as a place holder to the block it probes. -// Like the sideeffect intrinsic defined above, this intrinsic is treated by the -// optimizer as having opaque side effects so that it won't be get rid of or moved +// Like the sideeffect intrinsic defined above, this intrinsic is treated by the +// optimizer as having opaque side effects so that it won't be get rid of or moved // out of the block it probes. def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrInaccessibleMemOnly, IntrWillReturn]>; diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1766,6 +1766,11 @@ bool ReuseFrameSlot) { PrettyStackTraceFunction prettyStackTrace(F); + for (Instruction &I : make_early_inc_range(instructions(F))) + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::coro_mark_param) + II->eraseFromParent(); + // The suspend-crossing algorithm in buildCoroutineFrame get tripped // up by uses in unreachable blocks, so remove them as a first pass. removeUnreachableBlocks(F);