diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -12,9 +12,12 @@ #include "CGCleanup.h" #include "CodeGenFunction.h" -#include "llvm/ADT/ScopeExit.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtVisitor.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/IntrinsicInst.h" +#include using namespace clang; using namespace CodeGen; @@ -75,6 +78,7 @@ // Stores the last emitted coro.free for the deallocate expressions, we use it // to wrap dealloc code with if(auto mem = coro.free) dealloc(mem). llvm::CallInst *LastCoroFree = nullptr; + bool LastCoroFreeUsedForDealloc = false; // If coro.id came from the builtin, remember the expression to give better // diagnostic. If CoroIdExpr is nullptr, the coro.id was created by @@ -412,9 +416,85 @@ } } }; + +// If the coroutine frame is overaligned and only an allocation function +// that does not take `std::align_val_t` is available, the proper alignement +// for coroutine frame is achieved by allocating more memory than needed and +// dynamically adjust the frame start address at runtime. +void GrowFrameSize(CodeGenFunction &CGF, llvm::CallInst *CI, bool IsAlloc) { + unsigned CoroSizeIdx = IsAlloc ? 0 : 1; + CGBuilderTy &Builder = CGF.Builder; + auto OrigIP = Builder.saveIP(); + Builder.SetInsertPoint(CI); + llvm::Function *CoroAlign = + CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_align, CGF.SizeTy); + const auto &TI = CGF.CGM.getContext().getTargetInfo(); + unsigned AlignOfNew = TI.getNewAlign() / TI.getCharWidth(); + auto *AlignCall = Builder.CreateCall(CoroAlign); + auto *AlignOfNewInt = llvm::ConstantInt::get(CGF.SizeTy, AlignOfNew, true); + auto *Diff = Builder.CreateNSWSub(AlignCall, AlignOfNewInt); + auto *NewCoroSize = Builder.CreateAdd(CI->getArgOperand(CoroSizeIdx), Diff); + CI->setArgOperand(CoroSizeIdx, NewCoroSize); + Builder.restoreIP(OrigIP); +} + +void EmitDynamicAlignedDealloc(CodeGenFunction &CGF, + llvm::BasicBlock *AlignedFreeBB, + llvm::CallInst *CoroFree) { + llvm::CallInst *Dealloc = nullptr; + for (llvm::User *U : CoroFree->users()) { + if (auto *CI = dyn_cast(U)) + if (CI->getParent() == CGF.Builder.GetInsertBlock()) + Dealloc = CI; + } + assert(Dealloc); + + CGF.Builder.SetInsertPoint(AlignedFreeBB->getFirstNonPHI()); + + // Replace `coro.free` argument with the address from coroutine frame. + + llvm::Function *RawFramePtrOffsetIntrin = CGF.CGM.getIntrinsic( + llvm::Intrinsic::coro_raw_frame_ptr_offset, CGF.Int32Ty); + auto *RawFramePtrOffset = CGF.Builder.CreateCall(RawFramePtrOffsetIntrin); + auto *FramePtrAddrStart = + CGF.Builder.CreateInBoundsGEP(CoroFree, {RawFramePtrOffset}); + auto *FramePtrAddr = CGF.Builder.CreatePointerCast( + FramePtrAddrStart, CGF.Int8PtrTy->getPointerTo()); + auto *FramePtr = + CGF.Builder.CreateLoad({FramePtrAddr, CGF.getPointerAlign()}); + Dealloc->setArgOperand(0, FramePtr); + + // Match size_t argument with the one used during allocation. + + assert(Dealloc->getNumArgOperands() >= 1); + if (Dealloc->getNumArgOperands() > 1) { + // Size may only be the second argument of allocator call. + if (auto *CoroSize = + dyn_cast(Dealloc->getArgOperand(1))) + if (CoroSize->getIntrinsicID() == llvm::Intrinsic::coro_size) + GrowFrameSize(CGF, Dealloc, /*IsAlloc*/ false); + } + + CGF.Builder.SetInsertPoint(AlignedFreeBB); +} + +void EmitCheckAlignBasicBlock(CodeGenFunction &CGF, + llvm::BasicBlock *CheckAlignBB, + llvm::BasicBlock *AlignBB, + llvm::BasicBlock *NonAlignBB) { + CGF.EmitBlock(CheckAlignBB); + + auto &Builder = CGF.Builder; + auto &TI = CGF.CGM.getContext().getTargetInfo(); + unsigned NewAlign = TI.getNewAlign() / TI.getCharWidth(); + auto *CoroAlign = Builder.CreateCall( + CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_align, CGF.SizeTy)); + auto *AlignOfNew = llvm::ConstantInt::get(CGF.SizeTy, NewAlign); + auto *Cmp = + Builder.CreateICmp(llvm::CmpInst::ICMP_UGT, CoroAlign, AlignOfNew); + Builder.CreateCondBr(Cmp, AlignBB, NonAlignBB); } -namespace { // Make sure to call coro.delete on scope exit. struct CallCoroDelete final : public EHScopeStack::Cleanup { Stmt *Deallocate; @@ -432,21 +512,33 @@ // call. BasicBlock *SaveInsertBlock = CGF.Builder.GetInsertBlock(); + auto *CheckAlignBB = CGF.createBasicBlock("coro.free.check.align"); + auto *AlignedFreeBB = CGF.createBasicBlock("coro.free.align"); auto *FreeBB = CGF.createBasicBlock("coro.free"); + auto *AfterFreeBB = CGF.createBasicBlock("after.coro.free"); + + EmitCheckAlignBasicBlock(CGF, CheckAlignBB, AlignedFreeBB, FreeBB); + CGF.EmitBlock(FreeBB); CGF.EmitStmt(Deallocate); - - auto *AfterFreeBB = CGF.createBasicBlock("after.coro.free"); - CGF.EmitBlock(AfterFreeBB); + CGF.Builder.CreateBr(AfterFreeBB); // We should have captured coro.free from the emission of deallocate. auto *CoroFree = CGF.CurCoro.Data->LastCoroFree; + CGF.CurCoro.Data->LastCoroFreeUsedForDealloc = true; if (!CoroFree) { CGF.CGM.Error(Deallocate->getBeginLoc(), "Deallocation expressoin does not refer to coro.free"); return; } + CGF.EmitBlock(AlignedFreeBB); + CGF.EmitStmt(Deallocate); + CGF.CurCoro.Data->LastCoroFreeUsedForDealloc = false; + EmitDynamicAlignedDealloc(CGF, AlignedFreeBB, CoroFree); + + CGF.EmitBlock(AfterFreeBB); + // Get back to the block we were originally and move coro.free there. auto *InsertPt = SaveInsertBlock->getTerminator(); CoroFree->moveBefore(InsertPt); @@ -455,7 +547,7 @@ // Add if (auto *mem = coro.free) Deallocate; auto *NullPtr = llvm::ConstantPointerNull::get(CGF.Int8PtrTy); auto *Cond = CGF.Builder.CreateICmpNE(CoroFree, NullPtr); - CGF.Builder.CreateCondBr(Cond, FreeBB, AfterFreeBB); + CGF.Builder.CreateCondBr(Cond, CheckAlignBB, AfterFreeBB); // No longer need old terminator. InsertPt->eraseFromParent(); @@ -540,6 +632,51 @@ CGF.EmitStmt(OnFallthrough); } +static llvm::Value *emitAlignUpTo(CodeGenFunction &CGF, llvm::Value *Src, + llvm::Value *Align, const Expr *E) { + auto &Builder = CGF.Builder; + llvm::Type *SrcType = Src->getType(); + llvm::IntegerType *IntType = llvm::IntegerType::get( + CGF.getLLVMContext(), + CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType)); + + llvm::Value *Alignment = Align; + auto *One = llvm::ConstantInt::get(IntType, 1); + llvm::Value *Mask = Builder.CreateSub(Alignment, One, "mask"); + + llvm::Value *SrcAddr = Builder.CreatePtrToInt(Src, IntType, "intptr"); + llvm::Value *SrcForMask = Builder.CreateAdd(SrcAddr, Mask, "over_boundary"); + llvm::Value *InvertedMask = Builder.CreateNot(Mask, "inverted_mask"); + llvm::Value *Result = + Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result"); + + Result->setName("aligned_intptr"); + llvm::Value *Difference = Builder.CreateSub(Result, SrcAddr, "diff"); + + unsigned addressSpace = cast(SrcType)->getAddressSpace(); + llvm::PointerType *destType = CGF.Int8PtrTy; + if (addressSpace) + destType = llvm::Type::getInt8PtrTy(CGF.getLLVMContext(), addressSpace); + Value *Base = Src; + if (SrcType != destType) + Base = Builder.CreateBitCast(Src, destType); + + if (CGF.getLangOpts().isSignedOverflowDefined()) + Result = Builder.CreateGEP(Base, Difference, "aligned_result"); + else + Result = Builder.CreateInBoundsGEP(Base, Difference, "aligned_result"); + Result = Builder.CreatePointerCast(Result, SrcType); + + if (Alignment->getType() != CGF.IntPtrTy) + Alignment = + Builder.CreateIntCast(Alignment, CGF.IntPtrTy, false, "casted.align"); + + Builder.CreateAlignmentAssumption(CGF.CGM.getDataLayout(), Result, Alignment); + + assert(Result->getType() == SrcType); + return Result; +} + void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy()); auto &TI = CGM.getContext().getTargetInfo(); @@ -547,9 +684,13 @@ auto *EntryBB = Builder.GetInsertBlock(); auto *AllocBB = createBasicBlock("coro.alloc"); + auto *AlignAllocBB = createBasicBlock("coro.alloc.align"); + auto *CheckAlignBB = createBasicBlock("coro.alloc.check.align"); auto *InitBB = createBasicBlock("coro.init"); auto *FinalBB = createBasicBlock("coro.final"); auto *RetBB = createBasicBlock("coro.ret"); + llvm::BasicBlock *RetOnFailureBB = nullptr; + llvm::BasicBlock *AlignAllocBBCont = nullptr; auto *CoroId = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::coro_id), @@ -564,7 +705,9 @@ auto *CoroAlloc = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::coro_alloc), {CoroId}); - Builder.CreateCondBr(CoroAlloc, AllocBB, InitBB); + Builder.CreateCondBr(CoroAlloc, CheckAlignBB, InitBB); + + EmitCheckAlignBasicBlock(*this, CheckAlignBB, AlignAllocBB, AllocBB); EmitBlock(AllocBB); auto *AllocateCall = EmitScalarExpr(S.getAllocate()); @@ -572,10 +715,9 @@ // Handle allocation failure if 'ReturnStmtOnAllocFailure' was provided. if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure()) { - auto *RetOnFailureBB = createBasicBlock("coro.ret.on.failure"); + RetOnFailureBB = createBasicBlock("coro.ret.on.failure"); // See if allocation was successful. - auto *NullPtr = llvm::ConstantPointerNull::get(Int8PtrTy); auto *Cond = Builder.CreateICmpNE(AllocateCall, NullPtr); Builder.CreateCondBr(Cond, InitBB, RetOnFailureBB); @@ -587,12 +729,55 @@ Builder.CreateBr(InitBB); } + EmitBlock(AlignAllocBB); + auto *AlignedAllocateCall = EmitScalarExpr(S.getAllocate()); + + // The codegen'd IR looks like: + // void *rawFrame = nullptr; + // ... + // if (llvm.coro.alloc()) { + // size_t size = llvm.coro.size(), align = llvm.coro.align(); + // if (align > NEW_ALIGN) { + // size += align - NEW_ALIGN; + // frame = operator new(size); + // rawFrame = frame; + // frame = (frame + align - 1) & ~(align - 1); + // } else { + // frame = operator new(size); + // } + // } + + // size += align - NEW_ALIGN + GrowFrameSize(*this, cast(AlignedAllocateCall), + /*IsAlloc*/ true); + if (S.getReturnStmtOnAllocFailure()) { + auto *Cond = Builder.CreateICmpNE(AlignedAllocateCall, NullPtr); + AlignAllocBBCont = createBasicBlock("coro.alloc.align2"); + assert(RetOnFailureBB); + Builder.CreateCondBr(Cond, AlignAllocBBCont, RetOnFailureBB); + EmitBlock(AlignAllocBBCont); + } + // frame = (frame + align - 1) & ~(align - 1) + auto *CoroAlign = + Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::coro_align, SizeTy)); + auto *AlignedUpAddr = + emitAlignUpTo(*this, AlignedAllocateCall, CoroAlign, S.getAllocate()); + // rawFrame = frame + auto *RawFramePtrAddrIntrin = + CGM.getIntrinsic(llvm::Intrinsic::coro_raw_frame_ptr_addr); + auto *RawFramePtrAddr = Builder.CreateCall(RawFramePtrAddrIntrin); + Builder.CreateStore(AlignedAllocateCall, + {RawFramePtrAddr, getPointerAlign()}); + EmitBlock(InitBB); // Pass the result of the allocation to coro.begin. - auto *Phi = Builder.CreatePHI(VoidPtrTy, 2); + auto *Phi = Builder.CreatePHI(VoidPtrTy, 3); Phi->addIncoming(NullPtr, EntryBB); Phi->addIncoming(AllocateCall, AllocOrInvokeContBB); + Phi->addIncoming(AlignedUpAddr, + AlignAllocBBCont ? AlignAllocBBCont : AlignAllocBB); + auto *CoroBegin = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi}); CurCoro.Data->CoroBegin = CoroBegin; @@ -729,6 +914,10 @@ case llvm::Intrinsic::coro_alloc: case llvm::Intrinsic::coro_begin: case llvm::Intrinsic::coro_free: { + // Make deallocation and aligned deallocation share one `coro.free`. + if (CurCoro.Data && CurCoro.Data->LastCoroFreeUsedForDealloc) + return RValue::get(CurCoro.Data->LastCoroFree); + if (CurCoro.Data && CurCoro.Data->CoroId) { Args.push_back(CurCoro.Data->CoroId); break; diff --git a/clang/test/CodeGenCoroutines/coro-alloc.cpp b/clang/test/CodeGenCoroutines/coro-alloc.cpp --- a/clang/test/CodeGenCoroutines/coro-alloc.cpp +++ b/clang/test/CodeGenCoroutines/coro-alloc.cpp @@ -57,24 +57,55 @@ extern "C" void f0(global_new_delete_tag) { // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[NeedAlloc:.+]] = call i1 @llvm.coro.alloc(token %[[ID]]) - // CHECK: br i1 %[[NeedAlloc]], label %[[AllocBB:.+]], label %[[InitBB:.+]] + // CHECK: br i1 %[[NeedAlloc]], label %[[CheckAlignBB:.+]], label %[[InitBB:.+]] + + // CHECK: [[CheckAlignBB]]: + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], 16 + // CHECK: br i1 %[[CMP]], label %[[AlignAllocBB:.+]], label %[[AllocBB:.+]] // CHECK: [[AllocBB]]: + // CHECK-NEXT: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK-NEXT: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) + // CHECK-NEXT: br label %[[InitBB:.+]] + + // CHECK: [[AlignAllocBB]]: // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[PAD:.+]] = sub nsw i64 %[[ALIGN]], 16 + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %[[PAD]] + // CHECK: %[[MEM2:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[NEWSIZE]]) + // CHECK: %[[ALIGN2:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[ALIGNED:.+]] = getelementptr inbounds i8, i8* %[[MEM2]], + // CHECK: call void @llvm.assume(i1 true) [ "align"(i8* %[[ALIGNED]], i64 %[[ALIGN2]]) ] + // CHECK: %[[ADDR:.+]] = call i8** @llvm.coro.raw.frame.ptr.addr() + // CHECK: store i8* %[[MEM2]], i8** %[[ADDR]], align 8 // CHECK: br label %[[InitBB]] // CHECK: [[InitBB]]: - // CHECK: %[[PHI:.+]] = phi i8* [ null, %{{.+}} ], [ %call, %[[AllocBB]] ] + // CHECK: %[[PHI:.+]] = phi i8* [ null, %{{.+}} ], [ %[[MEM]], %[[AllocBB]] ], [ %[[ALIGNED]], %[[AlignAllocBB]] ] // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin(token %[[ID]], i8* %[[PHI]]) // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) // CHECK: %[[NeedDealloc:.+]] = icmp ne i8* %[[MEM]], null - // CHECK: br i1 %[[NeedDealloc]], label %[[FreeBB:.+]], label %[[Afterwards:.+]] + // CHECK: br i1 %[[NeedDealloc]], label %[[CheckAlignBB:.+]], label %[[Afterwards:.+]] + + // CHECK: [[CheckAlignBB]]: + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], 16 + // CHECK: br i1 %[[CMP]], label %[[AlignedFreeBB:.+]], label %[[FreeBB:.+]] // CHECK: [[FreeBB]]: - // CHECK: call void @_ZdlPv(i8* %[[MEM]]) - // CHECK: br label %[[Afterwards]] + // CHECK-NEXT: call void @_ZdlPv(i8* %[[MEM]]) + // CHECK-NEXT: br label %[[Afterwards]] + + // CHECK: [[AlignedFreeBB]]: + // CHECK-NEXT: %[[OFFSET:.+]] = call i32 @llvm.coro.raw.frame.ptr.offset.i32() + // CHECK-NEXT: %[[ADDR:.+]] = getelementptr inbounds i8, i8* %[[MEM]], i32 %[[OFFSET]] + // CHECK-NEXT: %[[ADDR2:.+]] = bitcast i8* %[[ADDR]] to i8** + // CHECK-NEXT: %[[MEM:.+]] = load i8*, i8** %[[ADDR2]], align 8 + // CHECK-NEXT: call void @_ZdlPv(i8* %[[MEM]]) + // CHECK-NEXT: br label %[[Afterwards]] // CHECK: [[Afterwards]]: // CHECK: ret void @@ -157,6 +188,7 @@ // CHECK-LABEL: f1b( extern "C" void f1b(promise_matching_global_placement_new_tag, dummy *) { // CHECK: call noalias nonnull i8* @_Znwm(i64 + // CHECK-NOT: call noalias nonnull i8* @_ZnwmSt11align_val_t(i64 co_return; } @@ -182,6 +214,7 @@ // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv18promise_delete_tagEE12promise_typedlEPv(i8* %[[MEM]]) + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv18promise_delete_tagEE12promise_typedlEPv(i8* co_return; } @@ -206,8 +239,24 @@ // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) + // CHECK: call i64 @llvm.coro.align.i64() + // CHECK: br i1 {{.*}}, label %[[AlignFreeBB:.+]], label %[[FreeBB:.+]] + + // CHECK: [[FreeBB]]: // CHECK: %[[SIZE2:.+]] = call i64 @llvm.coro.size.i64() // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv24promise_sized_delete_tagEE12promise_typedlEPvm(i8* %[[MEM]], i64 %[[SIZE2]]) + + // CHECK: [[AlignFreeBB]]: + // CHECK: %[[OFFSET:.+]] = call i32 @llvm.coro.raw.frame.ptr.offset.i32() + // CHECK: %[[ADDR:.+]] = getelementptr inbounds i8, i8* %[[MEM]], i32 %[[OFFSET]] + // CHECK: %[[ADDR2:.+]] = bitcast i8* %[[ADDR]] to i8** + // CHECK: %[[MEM2:.+]] = load i8*, i8** %[[ADDR2]], align 8 + // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[DIFF:.+]] = sub nsw i64 %[[ALIGN]], 16 + // CHECK: %[[SIZE2:.+]] = add i64 %[[SIZE]], %[[DIFF]] + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv24promise_sized_delete_tagEE12promise_typedlEPvm(i8* %[[MEM2]], i64 %[[SIZE2]]) + co_return; } @@ -229,16 +278,41 @@ // CHECK: %[[RetVal:.+]] = alloca i32 // CHECK: %[[Gro:.+]] = alloca i32 // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 + // CHECK: br i1 %{{.*}}, label %[[CheckAlignBB:.+]], label %[[OKBB:.+]] + + // CHECK: [[CheckAlignBB]]: + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], 16 + // CHECK: br i1 %[[CMP]], label %[[AlignAllocBB:.+]], label %[[AllocBB:.+]] + + // CHECK: [[AllocBB]]: // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() // CHECK: %[[MEM:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[SIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow) // CHECK: %[[OK:.+]] = icmp ne i8* %[[MEM]], null - // CHECK: br i1 %[[OK]], label %[[OKBB:.+]], label %[[ERRBB:.+]] + // CHECK: br i1 %[[OK]], label %[[OKBB]], label %[[ERRBB:.+]] // CHECK: [[ERRBB]]: // CHECK: %[[FailRet:.+]] = call i32 @_ZNSt12experimental16coroutine_traitsIJi28promise_on_alloc_failure_tagEE12promise_type39get_return_object_on_allocation_failureEv( // CHECK: store i32 %[[FailRet]], i32* %[[RetVal]] // CHECK: br label %[[RetBB:.+]] + // CHECK: [[AlignAllocBB]]: + // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[PAD:.+]] = sub nsw i64 %[[ALIGN]], 16 + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %[[PAD]] + // CHECK: %[[MEM2:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[NEWSIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow) + // CHECK: %[[OK:.+]] = icmp ne i8* %[[MEM2]], null + // CHECK: br i1 %[[OK]], label %[[AlignAllocBBCont:.+]], label %[[ERRBB:.+]] + + // CHECK: [[AlignAllocBBCont]]: + // CHECK: %[[ALIGN2:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[ALIGNED:.+]] = getelementptr inbounds i8, i8* %[[MEM2]], + // CHECK: call void @llvm.assume(i1 true) [ "align"(i8* %[[ALIGNED]], i64 %[[ALIGN2]]) ] + // CHECK: %[[ADDR:.+]] = call i8** @llvm.coro.raw.frame.ptr.addr() + // CHECK: store i8* %[[MEM2]], i8** %[[ADDR]], align 8 + // CHECK: br label %[[OKBB]] + // CHECK: [[OKBB]]: // CHECK: %[[OkRet:.+]] = call i32 @_ZNSt12experimental16coroutine_traitsIJi28promise_on_alloc_failure_tagEE12promise_type17get_return_objectEv( // CHECK: store i32 %[[OkRet]], i32* %[[Gro]] diff --git a/clang/test/CodeGenCoroutines/coro-cleanup.cpp b/clang/test/CodeGenCoroutines/coro-cleanup.cpp --- a/clang/test/CodeGenCoroutines/coro-cleanup.cpp +++ b/clang/test/CodeGenCoroutines/coro-cleanup.cpp @@ -78,12 +78,46 @@ // CHECK: [[Cleanup]]: // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_typeD1Ev( - // CHECK: %[[Mem0:.+]] = call i8* @llvm.coro.free( - // CHECK: call void @_ZdlPv(i8* %[[Mem0]] + // CHECK: %[[MEM0:.+]] = call i8* @llvm.coro.free( + // CHECK: br i1 %{{.*}}, label %[[CheckAlignBB:.+]], label %[[Afterwards:.+]] + + // CHECK: [[CheckAlignBB]]: + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], + // CHECK: br i1 %[[CMP]], label %[[AlignedFreeBB:.+]], label %[[FreeBB:.+]] + + // CHECK: [[FreeBB]]: + // CHECK: call void @_ZdlPv(i8* %[[MEM0]] + // CHECK: br label %[[Afterwards]] + + // CHECK: [[AlignedFreeBB]]: + // CHECK-NEXT: %[[OFFSET:.+]] = call i32 @llvm.coro.raw.frame.ptr.offset.i32() + // CHECK-NEXT: %[[ADDR:.+]] = getelementptr inbounds i8, i8* %[[MEM0]], i32 %[[OFFSET]] + // CHECK-NEXT: %[[ADDR2:.+]] = bitcast i8* %[[ADDR]] to i8** + // CHECK-NEXT: %[[MEM:.+]] = load i8*, i8** %[[ADDR2]], align 8 + // CHECK-NEXT: call void @_ZdlPv(i8* %[[MEM]]) + // CHECK-NEXT: br label %[[Afterwards]] // CHECK: [[Dealloc]]: - // CHECK: %[[Mem:.+]] = call i8* @llvm.coro.free( - // CHECK: call void @_ZdlPv(i8* %[[Mem]]) + // CHECK: %[[MEM0:.+]] = call i8* @llvm.coro.free( + // CHECK: br i1 %{{.*}}, label %[[CheckAlignBB:.+]], label %[[Afterwards:.+]] + + // CHECK: [[CheckAlignBB]]: + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], + // CHECK: br i1 %[[CMP]], label %[[AlignedFreeBB:.+]], label %[[FreeBB:.+]] + + // CHECK: [[FreeBB]]: + // CHECK: call void @_ZdlPv(i8* %[[MEM0]] + // CHECK: br label %[[Afterwards]] + + // CHECK: [[AlignedFreeBB]]: + // CHECK-NEXT: %[[OFFSET:.+]] = call i32 @llvm.coro.raw.frame.ptr.offset.i32() + // CHECK-NEXT: %[[ADDR:.+]] = getelementptr inbounds i8, i8* %[[MEM0]], i32 %[[OFFSET]] + // CHECK-NEXT: %[[ADDR2:.+]] = bitcast i8* %[[ADDR]] to i8** + // CHECK-NEXT: %[[MEM:.+]] = load i8*, i8** %[[ADDR2]], align 8 + // CHECK-NEXT: call void @_ZdlPv(i8* %[[MEM]]) + // CHECK-NEXT: br label %[[Afterwards]] co_return; } diff --git a/clang/test/CodeGenCoroutines/coro-gro.cpp b/clang/test/CodeGenCoroutines/coro-gro.cpp --- a/clang/test/CodeGenCoroutines/coro-gro.cpp +++ b/clang/test/CodeGenCoroutines/coro-gro.cpp @@ -68,6 +68,7 @@ // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_typeD1Ev( // CHECK: %[[Mem:.+]] = call i8* @llvm.coro.free( // CHECK: call void @_ZdlPv(i8* %[[Mem]]) + // CHECK: call void @_ZdlPv(i8* %{{.*}}) // Initialize retval from Gro and destroy Gro diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst --- a/llvm/docs/Coroutines.rst +++ b/llvm/docs/Coroutines.rst @@ -123,6 +123,17 @@ to the coroutine object. The frontend should always resume or destroy the coroutine using the corresponding intrinsics. +**raw frame**: When the coroutine frame alignment required is bigger than +__STDCPP_DEFAULT_NEW_ALIGNMENT__, more space than the size of coroutine frame +needs to be allocated to satisfy the alignment requirement of coroutine frame. +In this case, the memory address returned by the memory allocator is different +from coroutine frame start address. The memory address returned by the memory allocator is called the "raw frame pointer". coroutine frame start address +is at non-negative offset from "raw frame pointer". The maximal gap between the +two is `llvm.coro.align() - __STDCPP_DEFAULT_NEW_ALIGNMENT__` whereas the +actual gap is a runtime property. When a coroutine frame is overaligned, the +"raw frame pointer" may be stored in the coroutine frame and it could be +retrieved using `llvm.coro.raw.frame.ptr.*` intrinsics. + Returned-Continuation Lowering ------------------------------ @@ -948,6 +959,90 @@ The `coro.size` intrinsic is lowered to a constant representing the size of the coroutine frame. +.. _coro.align: + +'llvm.coro.align' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare i32 @llvm.coro.align.i32() + declare i64 @llvm.coro.align.i64() + +Overview: +""""""""" + +The '``llvm.coro.align``' intrinsic returns the alignment of the coroutine frame +in bytes. + +Arguments: +"""""""""" + +None + +Semantics: +"""""""""" + +The `coro.align` intrinsic is lowered to a constant representing the alignment +of the coroutine frame. + +.. _coro.raw.frame.ptr.offset: + +'llvm.coro.raw.frame.ptr.offset' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare i32 @llvm.coro.raw.frame.ptr.offset.i32() + declare i64 @llvm.coro.raw.frame.ptr.offset.i64() + +Overview: +""""""""" + +The '``llvm.coro.raw.frame.ptr.offset``' intrinsic returns the byte offset of +the `raw frame pointer` in coroutine frame. This is only supported for +switched-resume coroutines. The return value is undefined when the coroutine +frame is not overaligned. + +Arguments: +"""""""""" + +None + +Semantics: +"""""""""" + +The `coro.raw.frame.ptr.offset` intrinsic is lowered to a constant representing +the byte offset of the `raw frame pointer` in coroutine frame. `raw frame pointer` +is the pointer returned by the allocator for the coroutine frame. The address +returned by `llvm.coro.begin` is at a non-negative offset from `raw frame pointer`. +The return value is undefined when the coroutine frame is not overaligned. + +.. _coro.raw.frame.ptr.addr: + +'llvm.coro.raw.frame.ptr.addr' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare i8** @llvm.coro.raw.frame.ptr.addr() + +Overview: +""""""""" + +The '``llvm.coro.raw.frame.ptr.addr``' intrinsic returns the address storing the +`raw frame pointer` in the coroutine frame. This is only supported for +switched-resume coroutines. The return value is undefined when the coroutine +frame is not overaligned. + +Arguments: +"""""""""" + +None + +Semantics: +"""""""""" + +The `coro.raw.frame.ptr.offset.addr` intrinsic is lowered to the address of a +coroutine frame field storing the `raw frame pointer`. + .. _coro.begin: 'llvm.coro.begin' Intrinsic @@ -974,11 +1069,7 @@ Semantics: """""""""" -Depending on the alignment requirements of the objects in the coroutine frame -and/or on the codegen compactness reasons the pointer returned from `coro.begin` -may be at offset to the `%mem` argument. (This could be beneficial if -instructions that express relative access to data can be more compactly encoded -with small positive and negative offsets). +`coro.begin` returns its second argument. A frontend should emit exactly one `coro.begin` intrinsic per coroutine. diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1265,6 +1265,9 @@ def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; +def int_coro_align : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; +def int_coro_raw_frame_ptr_offset : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; +def int_coro_raw_frame_ptr_addr : Intrinsic<[llvm_ptrptr_ty], [], [IntrNoMem]>; def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>; def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>; diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/StackLifetime.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" @@ -531,6 +532,8 @@ return StructAlign; } + SmallVector &getFields() { return Fields; } + FieldIDType getLayoutFieldIndex(FieldIDType Id) const { assert(IsFinished && "not yet finished!"); return Fields[Id].LayoutFieldIndex; @@ -1126,21 +1129,59 @@ // Because multiple allocas may own the same field slot, // we add allocas to field here. B.addFieldForAllocas(F, FrameData, Shape); - // Add PromiseAlloca to Allocas list so that - // 1. updateLayoutIndex could update its index after - // `performOptimizedStructLayout` - // 2. it is processed in insertSpills. - if (Shape.ABI == coro::ABI::Switch && PromiseAlloca) - // We assume that the promise alloca won't be modified before - // CoroBegin and no alias will be create before CoroBegin. - FrameData.Allocas.emplace_back( - PromiseAlloca, DenseMap>{}, false); + // Create an entry for every spilled value. for (auto &S : FrameData.Spills) { FieldIDType Id = B.addField(S.first->getType(), None); FrameData.setFieldIndex(S.first, Id); } + Optional FramePtrField = None; + if (Shape.ABI == coro::ABI::Switch) { + // Add PromiseAlloca to Allocas list so that + // 1. updateLayoutIndex could update its index after + // `performOptimizedStructLayout` + // 2. it is processed in insertSpills. + if (PromiseAlloca) + // We assume that the promise alloca won't be modified before + // CoroBegin and no alias will be create before CoroBegin. + FrameData.Allocas.emplace_back( + PromiseAlloca, DenseMap>{}, + false); + + Align FrameAlign = + std::max_element( + B.getFields().begin(), B.getFields().end(), + [](auto &F1, auto &F2) { return F1.Alignment < F2.Alignment; }) + ->Alignment; + + // Check for over-alignment. + Value *PtrAddr = + ConstantPointerNull::get(Type::getInt8PtrTy(C)->getPointerTo()); + unsigned NewAlign = Shape.getSwitchCoroId()->getAlignment(); + bool NeedFramePtrField = Shape.CoroRawFramePtrOffsets.size() > 0 || + Shape.CoroRawFramePtrAddrs.size() > 0; + if (NeedFramePtrField && NewAlign && FrameAlign > NewAlign) { + BasicBlock &Entry = F.getEntryBlock(); + IRBuilder<> Builder(&Entry, Entry.getFirstInsertionPt()); + + // Reserve frame space for raw frame pointer. + Value *Mem = Shape.CoroBegin->getMem(); + AllocaInst *FramePtrAddr = + Builder.CreateAlloca(Mem->getType(), nullptr, "alloc.frame.ptr"); + PtrAddr = FramePtrAddr; + FramePtrField = B.addFieldForAlloca(FramePtrAddr); + FrameData.setFieldIndex(FramePtrAddr, *FramePtrField); + FrameData.Allocas.emplace_back( + FramePtrAddr, DenseMap>{}, true); + } + + for (CoroRawFramePtrAddrInst *C : Shape.CoroRawFramePtrAddrs) { + C->replaceAllUsesWith(PtrAddr); + C->eraseFromParent(); + } + } + B.finish(FrameTy); FrameData.updateLayoutIndex(B); Shape.FrameAlign = B.getStructAlign(); @@ -1154,6 +1195,12 @@ Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value(); Shape.SwitchLowering.IndexOffset = IndexField.Offset; + if (FramePtrField) { + FieldIDType FieldIdx = B.getLayoutFieldIndex(*FramePtrField); + Shape.SwitchLowering.FramePtrOffset = + DL.getStructLayout(FrameTy)->getElementOffset(FieldIdx); + } + // Also round the frame size up to a multiple of its alignment, as is // generally expected in C/C++. Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign); diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h --- a/llvm/lib/Transforms/Coroutines/CoroInstr.h +++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h @@ -27,6 +27,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/raw_ostream.h" namespace llvm { @@ -121,6 +122,10 @@ : cast(Arg->stripPointerCasts()); } + unsigned getAlignment() const { + return cast(getArgOperand(AlignArg))->getZExtValue(); + } + void clearPromise() { Value *Arg = getArgOperand(PromiseArg); setArgOperand(PromiseArg, @@ -599,6 +604,42 @@ } }; +/// This represents the llvm.coro.align instruction. +class LLVM_LIBRARY_VISIBILITY CoroAlignInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_align; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.raw.frame.ptr.offset instruction. +class LLVM_LIBRARY_VISIBILITY CoroRawFramePtrOffsetInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_raw_frame_ptr_offset; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.raw.frame.ptr.addr instruction. +class LLVM_LIBRARY_VISIBILITY CoroRawFramePtrAddrInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_raw_frame_ptr_addr; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + class LLVM_LIBRARY_VISIBILITY AnyCoroEndInst : public IntrinsicInst { enum { FrameArg, UnwindArg }; diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -99,6 +99,9 @@ CoroBeginInst *CoroBegin; SmallVector CoroEnds; SmallVector CoroSizes; + SmallVector CoroAligns; + SmallVector CoroRawFramePtrOffsets; + SmallVector CoroRawFramePtrAddrs; SmallVector CoroSuspends; SmallVector SwiftErrorOps; @@ -135,6 +138,7 @@ unsigned IndexField; unsigned IndexAlign; unsigned IndexOffset; + unsigned FramePtrOffset; bool HasFinalSuspend; }; diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1046,23 +1046,44 @@ Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct); } -static void replaceFrameSize(coro::Shape &Shape) { +static void replaceFrameSizeAndAlign(coro::Shape &Shape) { if (Shape.ABI == coro::ABI::Async) updateAsyncFuncPointerContextSize(Shape); - if (Shape.CoroSizes.empty()) - return; + if (!Shape.CoroSizes.empty()) { + // In the same function all coro.sizes should have the same result type. + auto *SizeIntrin = Shape.CoroSizes.back(); + Module *M = SizeIntrin->getModule(); + const DataLayout &DL = M->getDataLayout(); + auto Size = DL.getTypeAllocSize(Shape.FrameTy); + auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); + + for (CoroSizeInst *CS : Shape.CoroSizes) { + CS->replaceAllUsesWith(SizeConstant); + CS->eraseFromParent(); + } + } - // In the same function all coro.sizes should have the same result type. - auto *SizeIntrin = Shape.CoroSizes.back(); - Module *M = SizeIntrin->getModule(); - const DataLayout &DL = M->getDataLayout(); - auto Size = DL.getTypeAllocSize(Shape.FrameTy); - auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); + if (!Shape.CoroAligns.empty()) { + auto *Intrin = Shape.CoroAligns.back(); + auto *AlignConstant = + ConstantInt::get(Intrin->getType(), Shape.FrameAlign.value()); - for (CoroSizeInst *CS : Shape.CoroSizes) { - CS->replaceAllUsesWith(SizeConstant); - CS->eraseFromParent(); + for (CoroAlignInst *CS : Shape.CoroAligns) { + CS->replaceAllUsesWith(AlignConstant); + CS->eraseFromParent(); + } + } + + if (!Shape.CoroRawFramePtrOffsets.empty()) { + auto *Intrin = Shape.CoroRawFramePtrOffsets.back(); + auto *FramePtrOffset = ConstantInt::get( + Intrin->getType(), Shape.SwitchLowering.FramePtrOffset); + + for (CoroRawFramePtrOffsetInst *CS : Shape.CoroRawFramePtrOffsets) { + CS->replaceAllUsesWith(FramePtrOffset); + CS->eraseFromParent(); + } } } @@ -1798,7 +1819,7 @@ simplifySuspendPoints(Shape); buildCoroutineFrame(F, Shape); - replaceFrameSize(Shape); + replaceFrameSizeAndAlign(Shape); // If there are no suspend points, no split required, just remove // the allocation and deallocation blocks, they are not needed. diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -234,6 +234,9 @@ Shape.CoroBegin = nullptr; Shape.CoroEnds.clear(); Shape.CoroSizes.clear(); + Shape.CoroAligns.clear(); + Shape.CoroRawFramePtrOffsets.clear(); + Shape.CoroRawFramePtrAddrs.clear(); Shape.CoroSuspends.clear(); Shape.FrameTy = nullptr; @@ -268,6 +271,15 @@ case Intrinsic::coro_size: CoroSizes.push_back(cast(II)); break; + case Intrinsic::coro_align: + CoroAligns.push_back(cast(II)); + break; + case Intrinsic::coro_raw_frame_ptr_offset: + CoroRawFramePtrOffsets.push_back(cast(II)); + break; + case Intrinsic::coro_raw_frame_ptr_addr: + CoroRawFramePtrAddrs.push_back(cast(II)); + break; case Intrinsic::coro_frame: CoroFrames.push_back(cast(II)); break; @@ -375,6 +387,7 @@ this->SwitchLowering.ResumeSwitch = nullptr; this->SwitchLowering.PromiseAlloca = SwitchId->getPromise(); this->SwitchLowering.ResumeEntryBlock = nullptr; + this->SwitchLowering.FramePtrOffset = 0; for (auto AnySuspend : CoroSuspends) { auto Suspend = dyn_cast(AnySuspend); diff --git a/llvm/test/Transforms/Coroutines/coro-frame-overalign.ll b/llvm/test/Transforms/Coroutines/coro-frame-overalign.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-frame-overalign.ll @@ -0,0 +1,78 @@ +; Check that `llvm.coro.align`, `llvm.coro.raw.frame.ptr.offset` and +; `@llvm.coro.raw.frame.ptr.alloca` are lowered correctly. +; RUN: opt < %s -passes=coro-split -S | FileCheck %s + +%PackedStruct = type <{ i64 }> + +declare void @consume(%PackedStruct*, i32, i32, i8**) +declare void @consume2(i32, i32) + +define i8* @f() "coroutine.presplit"="1" { +entry: + %data = alloca %PackedStruct, align 32 + %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %align = call i32 @llvm.coro.align.i32() + %offset = call i32 @llvm.coro.raw.frame.ptr.offset.i32() + %addr = call i8** @llvm.coro.raw.frame.ptr.addr() + call void @consume(%PackedStruct* %data, i32 %align, i32 %offset, i8** %addr) + %0 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %0, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + br label %cleanup + +cleanup: + %align2 = call i32 @llvm.coro.align.i32() + %offset2 = call i32 @llvm.coro.raw.frame.ptr.offset.i32() + call void @consume2(i32 %align2, i32 %offset2) + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; See if the raw frame pointer was inserted into the frame. +; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i8*, i1, [7 x i8], %PackedStruct } + +; See if we used correct index to access frame addr field (field 2). +; CHECK-LABEL: @f( +; CHECK: %alloc.frame.ptr = alloca i8*, align 8 +; CHECK: %[[FIELD:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 +; CHECK: %[[ADDR:.+]] = load i8*, i8** %alloc.frame.ptr, align 8 +; CHECK: store i8* %[[ADDR]], i8** %[[FIELD]], align 8 +; CHECK: %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5 +; CHECK: call void @consume(%PackedStruct* %[[DATA]], i32 32, i32 16, i8** %[[FIELD]]) +; CHECK: ret i8* + +; See if `llvm.coro.align` and `llvm.coro.raw.frame.ptr.offset` are lowered +; correctly during deallocation. +; CHECK-LABEL: @f.destroy( +; CHECK: call void @consume2(i32 32, i32 16) +; CHECK: call void @free(i8* %{{.*}}) + +; CHECK-LABEL: @f.cleanup( +; CHECK: call void @consume2(i32 32, i32 16) +; CHECK: call void @free(i8* + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i32 @llvm.coro.align.i32() +declare i32 @llvm.coro.raw.frame.ptr.offset.i32() +declare i8** @llvm.coro.raw.frame.ptr.addr() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @malloc(i32) +declare double @print(double) +declare void @free(i8*)