diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst --- a/llvm/docs/Coroutines.rst +++ b/llvm/docs/Coroutines.rst @@ -948,6 +948,32 @@ The `coro.size` intrinsic is lowered to a constant representing the size of the coroutine frame. +.. _coro.align: + +'llvm.coro.align' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare i32 @llvm.coro.align.i32() + declare i64 @llvm.coro.align.i64() + +Overview: +""""""""" + +The '``llvm.coro.align``' intrinsic returns the alignment of a `coroutine frame`_. +This is only supported for switched-resume coroutines. + +Arguments: +"""""""""" + +None + +Semantics: +"""""""""" + +The `coro.align` intrinsic is lowered to a constant representing the alignment of +the coroutine frame. + .. _coro.begin: 'llvm.coro.begin' Intrinsic diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -633,6 +633,7 @@ case Intrinsic::coro_end: case Intrinsic::coro_frame: case Intrinsic::coro_size: + case Intrinsic::coro_align: case Intrinsic::coro_suspend: case Intrinsic::coro_subfn_addr: // These intrinsics don't actually represent code after lowering. diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1272,6 +1272,7 @@ def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; +def int_coro_align : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>; def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>; diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h --- a/llvm/lib/Transforms/Coroutines/CoroInstr.h +++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h @@ -599,6 +599,18 @@ } }; +/// This represents the llvm.coro.align instruction. +class LLVM_LIBRARY_VISIBILITY CoroAlignInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_align; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + class LLVM_LIBRARY_VISIBILITY AnyCoroEndInst : public IntrinsicInst { enum { FrameArg, UnwindArg }; diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -104,6 +104,7 @@ CoroBeginInst *CoroBegin; SmallVector CoroEnds; SmallVector CoroSizes; + SmallVector CoroAligns; SmallVector CoroSuspends; SmallVector SwiftErrorOps; diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1083,10 +1083,16 @@ Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct); } -static void replaceFrameSize(coro::Shape &Shape) { +static void replaceFrameSizeAndAlignment(coro::Shape &Shape) { if (Shape.ABI == coro::ABI::Async) updateAsyncFuncPointerContextSize(Shape); + for (CoroAlignInst *CA : Shape.CoroAligns) { + CA->replaceAllUsesWith( + ConstantInt::get(CA->getType(), Shape.FrameAlign.value())); + CA->eraseFromParent(); + } + if (Shape.CoroSizes.empty()) return; @@ -1884,7 +1890,7 @@ simplifySuspendPoints(Shape); buildCoroutineFrame(F, Shape); - replaceFrameSize(Shape); + replaceFrameSizeAndAlignment(Shape); // If there are no suspend points, no split required, just remove // the allocation and deallocation blocks, they are not needed. diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -123,6 +123,7 @@ static bool isCoroutineIntrinsicName(StringRef Name) { // NOTE: Must be sorted! static const char *const CoroIntrinsics[] = { + "llvm.coro.align", "llvm.coro.alloc", "llvm.coro.async.context.alloc", "llvm.coro.async.context.dealloc", @@ -268,6 +269,9 @@ case Intrinsic::coro_size: CoroSizes.push_back(cast(II)); break; + case Intrinsic::coro_align: + CoroAligns.push_back(cast(II)); + break; case Intrinsic::coro_frame: CoroFrames.push_back(cast(II)); break; diff --git a/llvm/test/Transforms/Coroutines/coro-align-01.ll b/llvm/test/Transforms/Coroutines/coro-align-01.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-align-01.ll @@ -0,0 +1,54 @@ +; Tests that the coro.align intrinsic could be lowered to correct alignment +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +define i8* @f() "coroutine.presplit"="1" { +entry: + %x = alloca i64 + %y = alloca i64 + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %align = call i32 @llvm.coro.align.i32() + %alloc = call i8* @aligned_alloc(i32 %align, i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + %x.alias = bitcast i64* %x to i32* + call void @capture_call(i32* %x.alias) + %y.alias = bitcast i64* %y to i32* + call void @nocapture_call(i32* %y.alias) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape. +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i1 } +; CHECK-LABEL: define i8* @f() +; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 8, i32 32) +; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]]) + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i32 @llvm.coro.align.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare void @capture_call(i32*) +declare void @nocapture_call(i32* nocapture) +declare noalias i8* @aligned_alloc(i32, i32) +declare void @free(i8*) diff --git a/llvm/test/Transforms/Coroutines/coro-align-02.ll b/llvm/test/Transforms/Coroutines/coro-align-02.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-align-02.ll @@ -0,0 +1,46 @@ +; Tests that the coro.align intrinsic could be lowered to correct alignment +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +define i8* @f() "coroutine.presplit"="1" { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %align = call i32 @llvm.coro.align.i32() + %alloc = call i8* @aligned_alloc(i32 %align, i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape. +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1 } +; CHECK-LABEL: define i8* @f() +; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 8, i32 24) +; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]]) + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i32 @llvm.coro.align.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @aligned_alloc(i32, i32) +declare void @free(i8*) diff --git a/llvm/test/Transforms/Coroutines/coro-align-03.ll b/llvm/test/Transforms/Coroutines/coro-align-03.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-align-03.ll @@ -0,0 +1,54 @@ +; Tests that the coro.align intrinsic could be lowered to correct alignment +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +define i8* @f() "coroutine.presplit"="1" { +entry: + %x = alloca i64, align 16 + %y = alloca i64 + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %align = call i32 @llvm.coro.align.i32() + %alloc = call i8* @aligned_alloc(i32 %align, i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + %x.alias = bitcast i64* %x to i32* + call void @capture_call(i32* %x.alias) + %y.alias = bitcast i64* %y to i32* + call void @capture_call(i32* %y.alias) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape. +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i1 } +; CHECK-LABEL: define i8* @f() +; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 16, i32 40) +; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]]) + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i32 @llvm.coro.align.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare void @capture_call(i32*) +declare void @nocapture_call(i32* nocapture) +declare noalias i8* @aligned_alloc(i32, i32) +declare void @free(i8*) diff --git a/llvm/test/Transforms/Coroutines/coro-align-04.ll b/llvm/test/Transforms/Coroutines/coro-align-04.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-align-04.ll @@ -0,0 +1,54 @@ +; Tests that the coro.align intrinsic could be lowered to correct alignment +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +define i8* @f() "coroutine.presplit"="1" { +entry: + %x = alloca i1, align 64 + %y = alloca i64 + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %align = call i32 @llvm.coro.align.i32() + %alloc = call i8* @aligned_alloc(i32 %align, i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + %x.alias = bitcast i1* %x to i32* + call void @capture_call(i32* %x.alias) + %y.alias = bitcast i64* %y to i32* + call void @capture_call(i32* %y.alias) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape. +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i1, [39 x i8], i1 } +; CHECK-LABEL: define i8* @f() +; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 64, i32 72) +; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]]) + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i32 @llvm.coro.align.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare void @capture_call(i32*) +declare void @nocapture_call(i32* nocapture) +declare noalias i8* @aligned_alloc(i32, i32) +declare void @free(i8*) diff --git a/llvm/test/Transforms/Coroutines/coro-align-05.ll b/llvm/test/Transforms/Coroutines/coro-align-05.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-align-05.ll @@ -0,0 +1,54 @@ +; Tests that the coro.align intrinsic could be lowered to correct alignment +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +define i8* @f() "coroutine.presplit"="1" { +entry: + %x = alloca i1, align 64 + %y = alloca i64, align 32 + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %align = call i32 @llvm.coro.align.i32() + %alloc = call i8* @aligned_alloc(i32 %align, i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + %x.alias = bitcast i1* %x to i32* + call void @capture_call(i32* %x.alias) + %y.alias = bitcast i64* %y to i32* + call void @capture_call(i32* %y.alias) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape. +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, [15 x i8], i64, [24 x i8], i1 } +; CHECK-LABEL: define i8* @f() +; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 64, i32 72) +; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]]) + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i32 @llvm.coro.align.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare void @capture_call(i32*) +declare void @nocapture_call(i32* nocapture) +declare noalias i8* @aligned_alloc(i32, i32) +declare void @free(i8*)