diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst --- a/llvm/docs/Coroutines.rst +++ b/llvm/docs/Coroutines.rst @@ -948,6 +948,60 @@ The `coro.size` intrinsic is lowered to a constant representing the size of the coroutine frame. +.. _coro.align: + +'llvm.coro.align' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare i32 @llvm.coro.align.i32() + declare i64 @llvm.coro.align.i64() + +Overview: +""""""""" + +The '``llvm.coro.align``' intrinsic returns the alignment of the coroutine frame +in bytes. This is only supported for switched-resume coroutines. + +Arguments: +"""""""""" + +None + +Semantics: +"""""""""" + +The `coro.align` intrinsic is lowered to a constant representing the alignment +of the coroutine frame. + +.. _coro.raw.frame.ptr.offset: + +'llvm.coro.raw.frame.ptr.offset' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare i32 @llvm.coro.raw.frame.ptr.offset.i32() + declare i64 @llvm.coro.raw.frame.ptr.offset.i64() + +Overview: +""""""""" + +The '``llvm.coro.raw.frame.ptr.offset``' intrinsic returns the byte offset of +the raw memory block address (returned by the allocator) in coroutine frame. +The returned value is only legal when the coroutine frame is overaligned. +This is only supported for switched-resume coroutines. + +Arguments: +"""""""""" + +None + +Semantics: +"""""""""" + +The `coro.raw.frame.ptr.offset` intrinsic is lowered to a constant representing +the byte offset of the raw memory block address in coroutine frame. + .. _coro.begin: 'llvm.coro.begin' Intrinsic diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1241,6 +1241,8 @@ def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; +def int_coro_align : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; +def int_coro_raw_frame_ptr_offset : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>; def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>; diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -492,6 +492,8 @@ return StructAlign; } + SmallVector &getFields() { return Fields; } + FieldIDType getLayoutFieldIndex(FieldIDType Id) const { assert(IsFinished && "not yet finished!"); return Fields[Id].LayoutFieldIndex; @@ -770,21 +772,49 @@ // Because multiple allocas may own the same field slot, // we add allocas to field here. B.addFieldForAllocas(F, FrameData, Shape); - // Add PromiseAlloca to Allocas list so that - // 1. updateLayoutIndex could update its index after - // `performOptimizedStructLayout` - // 2. it is processed in insertSpills. - if (Shape.ABI == coro::ABI::Switch && PromiseAlloca) - // We assume that the promise alloca won't be modified before - // CoroBegin and no alias will be create before CoroBegin. - FrameData.Allocas.emplace_back( - PromiseAlloca, DenseMap>{}, false); + // Create an entry for every spilled value. for (auto &S : FrameData.Spills) { FieldIDType Id = B.addField(S.first->getType(), None); FrameData.setFieldIndex(S.first, Id); } + Optional FramePtrField = None; + if (Shape.ABI == coro::ABI::Switch) { + // Add PromiseAlloca to Allocas list so that + // 1. updateLayoutIndex could update its index after + // `performOptimizedStructLayout` + // 2. it is processed in insertSpills. + if (PromiseAlloca) + // We assume that the promise alloca won't be modified before + // CoroBegin and no alias will be create before CoroBegin. + FrameData.Allocas.emplace_back( + PromiseAlloca, DenseMap>{}, + false); + + Align FrameAlign = + std::max_element( + B.getFields().begin(), B.getFields().end(), + [](auto &F1, auto &F2) { return F1.Alignment < F2.Alignment; }) + ->Alignment; + + // Check for over-alignment. + unsigned NewAlign = Shape.getSwitchCoroId()->getAlignment(); + if (NewAlign && FrameAlign > NewAlign) { + BasicBlock &Entry = F.getEntryBlock(); + IRBuilder<> Builder(&Entry, Entry.getFirstInsertionPt()); + + // Reserve frame space for raw frame pointer. + Value *Mem = Shape.CoroBegin->getMem(); + AllocaInst *FramePtrAddr = + Builder.CreateAlloca(Mem->getType(), nullptr, "alloc.frame.ptr"); + FramePtrField = B.addFieldForAlloca(FramePtrAddr); + FrameData.setFieldIndex(FramePtrAddr, *FramePtrField); + FrameData.Allocas.emplace_back( + FramePtrAddr, DenseMap>{}, true); + } + } + B.finish(FrameTy); FrameData.updateLayoutIndex(B); Shape.FrameAlign = B.getStructAlign(); @@ -796,6 +826,12 @@ Shape.SwitchLowering.IndexField = B.getLayoutFieldIndex(*SwitchIndexFieldId); + if (FramePtrField) { + FieldIDType FieldIdx = B.getLayoutFieldIndex(*FramePtrField); + Shape.SwitchLowering.FramePtrOffset = + DL.getStructLayout(FrameTy)->getElementOffset(FieldIdx); + } + // Also round the frame size up to a multiple of its alignment, as is // generally expected in C/C++. Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign); diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h --- a/llvm/lib/Transforms/Coroutines/CoroInstr.h +++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h @@ -27,6 +27,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/raw_ostream.h" namespace llvm { @@ -121,6 +122,10 @@ : cast(Arg->stripPointerCasts()); } + unsigned getAlignment() const { + return cast(getArgOperand(AlignArg))->getZExtValue(); + } + void clearPromise() { Value *Arg = getArgOperand(PromiseArg); setArgOperand(PromiseArg, @@ -599,6 +604,30 @@ } }; +/// This represents the llvm.coro.align instruction. +class LLVM_LIBRARY_VISIBILITY CoroAlignInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_align; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.raw.frame.ptr.offset instruction. +class LLVM_LIBRARY_VISIBILITY CoroRawFramePtrOffsetInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_raw_frame_ptr_offset; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + class LLVM_LIBRARY_VISIBILITY AnyCoroEndInst : public IntrinsicInst { enum { FrameArg, UnwindArg }; diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -99,6 +99,8 @@ CoroBeginInst *CoroBegin; SmallVector CoroEnds; SmallVector CoroSizes; + SmallVector CoroAligns; + SmallVector CoroRawFramePtrOffsets; SmallVector CoroSuspends; SmallVector SwiftErrorOps; @@ -133,6 +135,7 @@ AllocaInst *PromiseAlloca; BasicBlock *ResumeEntryBlock; unsigned IndexField; + unsigned FramePtrOffset; bool HasFinalSuspend; }; diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1024,23 +1024,44 @@ Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct); } -static void replaceFrameSize(coro::Shape &Shape) { +static void replaceFrameSizeAndAlign(coro::Shape &Shape) { if (Shape.ABI == coro::ABI::Async) updateAsyncFuncPointerContextSize(Shape); - if (Shape.CoroSizes.empty()) - return; + if (!Shape.CoroSizes.empty()) { + // In the same function all coro.sizes should have the same result type. + auto *SizeIntrin = Shape.CoroSizes.back(); + Module *M = SizeIntrin->getModule(); + const DataLayout &DL = M->getDataLayout(); + auto Size = DL.getTypeAllocSize(Shape.FrameTy); + auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); + + for (CoroSizeInst *CS : Shape.CoroSizes) { + CS->replaceAllUsesWith(SizeConstant); + CS->eraseFromParent(); + } + } - // In the same function all coro.sizes should have the same result type. - auto *SizeIntrin = Shape.CoroSizes.back(); - Module *M = SizeIntrin->getModule(); - const DataLayout &DL = M->getDataLayout(); - auto Size = DL.getTypeAllocSize(Shape.FrameTy); - auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); + if (!Shape.CoroAligns.empty()) { + auto *Intrin = Shape.CoroAligns.back(); + auto *AlignConstant = + ConstantInt::get(Intrin->getType(), Shape.FrameAlign.value()); - for (CoroSizeInst *CS : Shape.CoroSizes) { - CS->replaceAllUsesWith(SizeConstant); - CS->eraseFromParent(); + for (CoroAlignInst *CS : Shape.CoroAligns) { + CS->replaceAllUsesWith(AlignConstant); + CS->eraseFromParent(); + } + } + + if (!Shape.CoroRawFramePtrOffsets.empty()) { + auto *Intrin = Shape.CoroRawFramePtrOffsets.back(); + auto *FramePtrOffset = ConstantInt::get( + Intrin->getType(), Shape.SwitchLowering.FramePtrOffset); + + for (CoroRawFramePtrOffsetInst *CS : Shape.CoroRawFramePtrOffsets) { + CS->replaceAllUsesWith(FramePtrOffset); + CS->eraseFromParent(); + } } } @@ -1776,7 +1797,7 @@ simplifySuspendPoints(Shape); buildCoroutineFrame(F, Shape); - replaceFrameSize(Shape); + replaceFrameSizeAndAlign(Shape); // If there are no suspend points, no split required, just remove // the allocation and deallocation blocks, they are not needed. diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -234,6 +234,8 @@ Shape.CoroBegin = nullptr; Shape.CoroEnds.clear(); Shape.CoroSizes.clear(); + Shape.CoroAligns.clear(); + Shape.CoroRawFramePtrOffsets.clear(); Shape.CoroSuspends.clear(); Shape.FrameTy = nullptr; @@ -268,6 +270,12 @@ case Intrinsic::coro_size: CoroSizes.push_back(cast(II)); break; + case Intrinsic::coro_align: + CoroAligns.push_back(cast(II)); + break; + case Intrinsic::coro_raw_frame_ptr_offset: + CoroRawFramePtrOffsets.push_back(cast(II)); + break; case Intrinsic::coro_frame: CoroFrames.push_back(cast(II)); break; @@ -375,6 +383,7 @@ this->SwitchLowering.ResumeSwitch = nullptr; this->SwitchLowering.PromiseAlloca = SwitchId->getPromise(); this->SwitchLowering.ResumeEntryBlock = nullptr; + this->SwitchLowering.FramePtrOffset = 0; for (auto AnySuspend : CoroSuspends) { auto Suspend = dyn_cast(AnySuspend); diff --git a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll --- a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll +++ b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll @@ -62,10 +62,10 @@ call i1 @llvm.coro.end(i8* null, i1 false) ret void } -; CHECK: %a.Frame = type { void (%a.Frame*)*, void (%a.Frame*)*, %"struct.task::promise_type", %struct.big_structure, i1, [26 x i8], %struct.big_structure.2 } +; CHECK: %a.Frame = type { void (%a.Frame*)*, void (%a.Frame*)*, %"struct.task::promise_type", %struct.big_structure, i1, i8*, [16 x i8], %struct.big_structure.2 } ; CHECK-LABEL: @a.resume( ; CHECK: %[[A:.*]] = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr, i32 0, i32 3 -; CHECK: %[[A:.*]] = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr, i32 0, i32 6 +; CHECK: %[[A:.*]] = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr, i32 0, i32 7 declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) declare i1 @llvm.coro.alloc(token) #3 diff --git a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll --- a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll +++ b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll @@ -62,7 +62,7 @@ call i1 @llvm.coro.end(i8* null, i1 false) ret void } -; CHECK: %a.Frame = type { void (%a.Frame*)*, void (%a.Frame*)*, %"struct.task::promise_type", i1, [14 x i8], %struct.big_structure } +; CHECK: %a.Frame = type { void (%a.Frame*)*, void (%a.Frame*)*, %"struct.task::promise_type", i1, i8*, %struct.big_structure } ; CHECK-LABEL: @a.resume( ; CHECK: %[[A:.*]] = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr, i32 0, i32 3 ; CHECK: %[[A:.*]] = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr, i32 0, i32 5