diff --git a/llvm/test/Transforms/Coroutines/coro-materialize.ll b/llvm/test/Transforms/Coroutines/coro-materialize.ll --- a/llvm/test/Transforms/Coroutines/coro-materialize.ll +++ b/llvm/test/Transforms/Coroutines/coro-materialize.ll @@ -1,6 +1,17 @@ ; Verifies that we materialize instruction across suspend points ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s +; See that we only spilled one value for f +; CHECK: %f.Frame = type { ptr, ptr, i32, i1 } +; Check other variants where different levels of materialization are achieved +; CHECK: %f_multiple_remat.Frame = type { ptr, ptr, i32, i32, i32, i1 } +; CHECK: %f_common_def.Frame = type { ptr, ptr, i32, i32, i32, i1 } +; CHECK: %f_common_def_multi_result.Frame = type { ptr, ptr, i32, i32, i32, i32, i32, i32, i32, i1 } +; CHECK-LABEL: @f( +; CHECK-LABEL: @f_multiple_remat( +; CHECK-LABEL: @f_common_def( +; CHECK-LABEL: @f_common_def_multi_result( + define ptr @f(i32 %n) presplitcoroutine { entry: %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) @@ -32,9 +43,119 @@ ret ptr %hdl } -; See that we only spilled one value -; CHECK: %f.Frame = type { ptr, ptr, i32, i1 } -; CHECK-LABEL: @f( +define ptr @f_multiple_remat(i32 %n) presplitcoroutine { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call ptr @malloc(i32 %size) + %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc) + + %inc1 = add i32 %n, 1 + %inc2 = add i32 %inc1, 2 + %inc3 = add i32 %inc2, 3 + %inc4 = add i32 %inc3, 4 + %inc5 = add i32 %inc4, 5 + %inc6 = add i32 %inc5, 5 + %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume1 + i8 1, label %cleanup] +resume1: + %inc7 = add i32 %inc6, 1 + %sp2 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume2 + i8 1, label %cleanup] + +resume2: + call void @print(i32 %inc1) + call void @print(i32 %inc7) + br label %cleanup + +cleanup: + %mem = call ptr @llvm.coro.free(token %id, ptr %hdl) + call void @free(ptr %mem) + br label %suspend +suspend: + call i1 @llvm.coro.end(ptr %hdl, i1 0) + ret ptr %hdl +} + +define ptr @f_common_def(i32 %n) presplitcoroutine { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call ptr @malloc(i32 %size) + %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc) + + %inc1 = add i32 %n, 1 + %inc2 = add i32 %inc1, 2 + %inc3 = add i32 %n, 3 + %inc4 = add i32 %inc3, %inc1 + %inc5 = add i32 %inc4, %inc1 + %inc6 = add i32 %inc5, 5 + %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume1 + i8 1, label %cleanup] +resume1: + %inc7 = add i32 %inc6, 1 + %sp2 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume2 + i8 1, label %cleanup] + +resume2: + call void @print(i32 %inc1) + call void @print(i32 %inc7) + br label %cleanup + +cleanup: + %mem = call ptr @llvm.coro.free(token %id, ptr %hdl) + call void @free(ptr %mem) + br label %suspend +suspend: + call i1 @llvm.coro.end(ptr %hdl, i1 0) + ret ptr %hdl +} + +define ptr @f_common_def_multi_result(i32 %n) presplitcoroutine { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call ptr @malloc(i32 %size) + %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc) + + %inc1 = add i32 %n, 1 + %inc2 = add i32 %inc1, 2 + %inc3 = add i32 %n, 3 + %inc4 = add i32 %inc3, %inc1 + %inc5 = add i32 %inc4, %inc1 + %inc6 = add i32 %inc5, 4 + %inc7 = add i32 %inc6, 5 + %inc8 = add i32 %inc4, %inc2 + %inc9 = add i32 %inc8, 5 + %inc10 = add i32 %inc9, 6 + %inc11 = add i32 %inc10, 7 + %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume1 + i8 1, label %cleanup] +resume1: + %inc12 = add i32 %inc7, 1 + %sp2 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume2 + i8 1, label %cleanup] + +resume2: + call void @print(i32 %inc11) + call void @print(i32 %inc12) + br label %cleanup + +cleanup: + %mem = call ptr @llvm.coro.free(token %id, ptr %hdl) + call void @free(ptr %mem) + br label %suspend +suspend: + call i1 @llvm.coro.end(ptr %hdl, i1 0) + ret ptr %hdl +} + declare ptr @llvm.coro.free(token, ptr) declare i32 @llvm.coro.size.i32() diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-remat.ll b/llvm/test/Transforms/Coroutines/coro-retcon-remat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-retcon-remat.ll @@ -0,0 +1,49 @@ +; Check that a remat that inserts rematerialized instructions in the single predecessor block works +; as expected +; RUN: opt < %s -O0 -S | FileCheck %s + +; CHECK: %f.Frame = type { i32, i32 } + +define { i8*, i32 } @f(i8* %buffer, i32 %n) { +entry: + %id = call token @llvm.coro.id.retcon(i32 8, i32 4, i8* %buffer, i8* bitcast ({ i8*, i32 } (i8*, i1)* @f_prototype to i8*), i8* bitcast (i8* (i32)* @allocate to i8*), i8* bitcast (void (i8*)* @deallocate to i8*)) + %hdl = call i8* @llvm.coro.begin(token %id, i8* null) + br label %loop + +loop: + %n.val = phi i32 [ %n, %entry ], [ %inc, %resume1 ] + call void @print(i32 %n.val) + %inc1 = add i32 %n.val, 1 + %inc2 = add i32 %inc1, 2 + %inc3 = add i32 %inc2, 3 + %inc4 = add i32 %inc3, 4 + %inc5 = add i32 %inc4, 5 + %inc6 = add i32 %inc5, 6 + %unwind0 = call i1 (...) @llvm.coro.suspend.retcon.i1(i32 %inc6) + br i1 %unwind0, label %cleanup, label %resume + +resume: + %unwind1 = call i1 (...) @llvm.coro.suspend.retcon.i1(i32 %inc6) + br i1 %unwind1, label %cleanup, label %resume1 + +resume1: + %inc = add i32 %n.val, 1 + br label %loop + +cleanup: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + unreachable +} + +declare token @llvm.coro.id.retcon(i32, i32, i8*, i8*, i8*, i8*) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.suspend.retcon.i1(...) +declare i1 @llvm.coro.end(i8*, i1) +declare i8* @llvm.coro.prepare.retcon(i8*) + +declare { i8*, i32 } @f_prototype(i8*, i1 zeroext) + +declare noalias i8* @allocate(i32 %size) +declare void @deallocate(i8* %ptr) + +declare void @print(i32)