Index: llvm/lib/Transforms/Coroutines/CoroFrame.cpp =================================================================== --- llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -1541,6 +1541,56 @@ rewritePHIs(*BB); } +/// Generates allocas for byval arguments to make sure they would be put in the +/// frame. +/// +/// Consider the following C++ example: +/// ``` +/// void foo() { +/// A a; +/// // Some works with a +/// coro_func(a); +/// // Other works with a +/// } +/// ``` +/// +/// From the perspective of a C++ programmer, there would be a storage for A in +/// the frame of `coro_func` if necessary. However, the IR generated in foo +/// would be: +/// ``` +/// void foo() { +/// %a = Alloca A +/// // Some works with %a +/// %a.byval-temp = Alloca A +/// memcpy(%a.byval-temp, %a) +/// coro_func(%a.byval-temp); +/// // Other works with %a +/// } +/// ``` +/// The type of the argument in `coro_func` would be a pointer instead of `A`, +/// which would be put in the frame. It may cause many problems. +/// +/// This function tries to solve the above problem by copying the byval +/// arguments. +/// FIXME: It would cause an extra copy. Try to eliminate it. +static void rewriteByValArguments(Function &F) { + IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHI()); + const DataLayout &DL = F.getParent()->getDataLayout(); + + for (Argument &Arg : F.args()) { + if (!Arg.hasByValAttr()) + continue; + + Type *ByValType = Arg.getParamByValType(); + auto *ByValAlloca = + Builder.CreateAlloca(ByValType, 0, nullptr, Arg.getName() + ".byval"); + Arg.replaceAllUsesWith(ByValAlloca); + Value *Size = Builder.getInt64(DL.getTypeStoreSize(ByValType)); + Builder.CreateMemCpy(ByValAlloca, Arg.getParamAlign(), &Arg, + Arg.getParamAlign(), Size); + } +} + // Check for instructions that we can recreate on resume as opposed to spill // the result into a coroutine frame. static bool materializable(Instruction &V) { @@ -2198,6 +2248,8 @@ // never has its definition separated from the PHI by the suspend point. rewritePHIs(F); + rewriteByValArguments(F); + // Build suspend crossing info. SuspendCrossingInfo Checker(F, Shape); Index: llvm/test/Transforms/Coroutines/coro-alloca-byval-param.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-alloca-byval-param.ll @@ -0,0 +1,59 @@ +; Check that we can handle the case when both alloc function and +; the user body consume the same argument. +; RUN: opt < %s -coro-split -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -S | FileCheck %s + +; using this directly (as it would happen under -O2) +define i8* @f_direct(i64* byval(i64) align 8 %a) "coroutine.presplit"="1" { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @myAlloc(i64* %a, i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %0 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %0, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + call void @print2(i64* %a) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; See if the content of %a would be put to the frame +; CHECK: %f_direct.Frame = type { void (%f_direct.Frame*)*, void (%f_direct.Frame*)*, i64, i1 } + +; See that %a.byval is generated and initialized +; CHECK-LABEL: define i8* @f_direct(i64* byval(i64) align 8 %a) +; CHECK: %a.byval = alloca i64, align 8 +; CHECK: %0 = bitcast i64* %a.byval to i8* +; CHECK: %1 = bitcast i64* %a to i8* +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 %1, i64 8, i1 false) + +; See that %a.byval was loaded from the frame +; CHECK-LABEL: @f_direct.resume( +; CHECK: %a.byval.reload.addr = getelementptr inbounds %f_direct.Frame, %f_direct.Frame* %FramePtr, i32 0, i32 2 +; CHECK: call void @print2(i64* %a.byval.reload.addr) +; CHECK: ret void + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @myAlloc(i64*, i32) +declare double @print(double) +declare void @print2(i64*) +declare void @free(i8*)