This patch adds a new coroutine intrinsic:
llvm.coro.sizeof.i32(i8* %func)
It has the same semantics as llvm.coro.size except that
it can be called from outside the coroutine itself.
This allows the caller to be in control of memory allocation
and deallocation, which is required for the Zig frontend.
With this, the caller would ask for the frame size, allocate
the memory, potentially handle allocation failure, and then
decide whether to actually call the coroutine.
It then does not need to call the llvm.coro.destroy intrinsic;
instead it can simply free the memory. This is useful,
for example, when the caller decides to use an arena allocator
to create N coroutines, and then free them all at once by
destroying the arena.
Here is ex0.ll updated with this different way of calling:
define i8* @f(i32 %n, i8* %alloc) { entry: %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) br label %loop loop: %n.val = phi i32 [ %n, %entry ], [ %inc, %resume ] call void @print(i32 %n.val) %0 = call i8 @llvm.coro.suspend(token none, i1 false) switch i8 %0, label %suspend [i8 0, label %resume] resume: %inc = add i32 %n.val, 1 br label %loop suspend: call i1 @llvm.coro.end(i8* %hdl, i1 0) ret i8* %hdl } define i32 @main() { entry: %size = call i32 @llvm.coro.sizeof.i32(i8* bitcast (i8* (i32, i8*)* @f to i8*)) %alloc = call i8* @malloc(i32 %size) %hdl = call i8* @f(i32 4, i8* %alloc) call void @llvm.coro.resume(i8* %hdl) call void @llvm.coro.resume(i8* %hdl) call void @free(i8* %alloc) ret i32 0 } declare token @llvm.coro.id(i32, i8*, i8*, i8*) declare i32 @llvm.coro.sizeof.i32(i8 *) declare i8 @llvm.coro.suspend(token, i1) declare void @llvm.coro.resume(i8*) declare i8* @llvm.coro.begin(token, i8*) declare i1 @llvm.coro.end(i8*, i1) declare noalias i8* @malloc(i32) declare void @print(i32) declare void @free(i8*)
Here it is passed through opt ex0.ll -enable-coroutines -O2 -S, with this patch:
%f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i32 } @f.resumers = private constant [3 x void (%f.Frame*)*] [void (%f.Frame*)* @f.resume, void (%f.Frame*)* @f.destroy, void (%f.Frame*)* @f.cleanup] define i8* @f(i32 %n, i8* %alloc) { entry: %resume.addr = bitcast i8* %alloc to void (%f.Frame*)** store void (%f.Frame*)* @f.resume, void (%f.Frame*)** %resume.addr, align 8 %destroy.addr = getelementptr inbounds i8, i8* %alloc, i64 8 %0 = bitcast i8* %destroy.addr to void (%f.Frame*)** store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %0, align 8 %n.val.spill.addr = getelementptr inbounds i8, i8* %alloc, i64 20 %1 = bitcast i8* %n.val.spill.addr to i32* store i32 %n, i32* %1, align 4 tail call void @print(i32 %n) %index.addr1 = getelementptr inbounds i8, i8* %alloc, i64 17 %2 = bitcast i8* %index.addr1 to i1* store i1 false, i1* %2, align 1 ret i8* %alloc } define i32 @main() local_unnamed_addr { entry: %alloc = tail call i8* @malloc(i32 0) %resume.addr.i = bitcast i8* %alloc to void (%f.Frame*)** store void (%f.Frame*)* @f.resume, void (%f.Frame*)** %resume.addr.i, align 8 %destroy.addr.i = getelementptr inbounds i8, i8* %alloc, i64 8 %0 = bitcast i8* %destroy.addr.i to void (%f.Frame*)** store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %0, align 8 %n.val.spill.addr.i = getelementptr inbounds i8, i8* %alloc, i64 20 %1 = bitcast i8* %n.val.spill.addr.i to i32* store i32 4, i32* %1, align 4 tail call void @print(i32 4) %index.addr1.i = getelementptr inbounds i8, i8* %alloc, i64 17 %2 = bitcast i8* %index.addr1.i to i1* store i1 false, i1* %2, align 1 store i32 5, i32* %1, align 4 tail call void @print(i32 5) store i1 false, i1* %2, align 1 store i32 6, i32* %1, align 4 tail call void @print(i32 6) store i1 false, i1* %2, align 1 tail call void @free(i8* %alloc) ret i32 0 } ; Function Attrs: argmemonly nounwind readonly declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #0 ; Function Attrs: nounwind declare i8* @llvm.coro.begin(token, i8* writeonly) #1 ; Function Attrs: nounwind declare noalias i8* @malloc(i32) local_unnamed_addr #1 declare void @print(i32) local_unnamed_addr ; Function Attrs: nounwind declare void @free(i8* nocapture) local_unnamed_addr #1 define internal fastcc void @f.resume(%f.Frame* %FramePtr) { entry.resume: %n.val.reload.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i64 0, i32 4 %n.val.reload1 = load i32, i32* %n.val.reload.addr, align 4 %inc2 = add i32 %n.val.reload1, 1 %index.addr1 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i64 0, i32 3 store i32 %inc2, i32* %n.val.reload.addr, align 4 tail call void @print(i32 %inc2) store i1 false, i1* %index.addr1, align 1 ret void } define internal fastcc void @f.destroy(%f.Frame* %FramePtr) { entry.destroy: ret void } define internal fastcc void @f.cleanup(%f.Frame* %FramePtr) { entry.cleanup: ret void } attributes #0 = { argmemonly nounwind readonly } attributes #1 = { nounwind }
It seems the CoroElide pass was not able to complete its mission.
Perhaps because with this strategy the caller does not have a token to pass
to llvm.coro.alloc. Maybe that could be added?
I would like some advice on where to go from here.