diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4433,7 +4433,7 @@ auto & Context = getContext(); auto SizeTy = Context.getSizeType(); auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); - Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T); + Function *F = CGM.getIntrinsic(Intrinsic::coro_size_aligned, T); return RValue::get(Builder.CreateCall(F)); } diff --git a/clang/test/CodeGenCoroutines/coro-alloc.cpp b/clang/test/CodeGenCoroutines/coro-alloc.cpp --- a/clang/test/CodeGenCoroutines/coro-alloc.cpp +++ b/clang/test/CodeGenCoroutines/coro-alloc.cpp @@ -60,7 +60,7 @@ // CHECK: br i1 %[[NeedAlloc]], label %[[AllocBB:.+]], label %[[InitBB:.+]] // CHECK: [[AllocBB]]: - // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.aligned.i64() // CHECK: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) // CHECK: br label %[[InitBB]] @@ -97,7 +97,7 @@ // CHECK-LABEL: f1( extern "C" void f1(promise_new_tag ) { // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 - // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.aligned.i64() // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv15promise_new_tagEE12promise_typenwEm(i64 %[[SIZE]]) // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( @@ -126,7 +126,7 @@ // CHECK: store float %y, float* %y.addr, align 4 // CHECK: store double %z, double* %z.addr, align 8 // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 - // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.aligned.i64() // CHECK: %[[INT:.+]] = load i32, i32* %x.addr, align 4 // CHECK: %[[FLOAT:.+]] = load float, float* %y.addr, align 4 // CHECK: %[[DOUBLE:.+]] = load double, double* %z.addr, align 8 @@ -176,7 +176,7 @@ // CHECK-LABEL: f2( extern "C" void f2(promise_delete_tag) { // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 - // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.aligned.i64() // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( @@ -201,12 +201,12 @@ // CHECK-LABEL: f3( extern "C" void f3(promise_sized_delete_tag) { // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 - // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.aligned.i64() // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) - // CHECK: %[[SIZE2:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[SIZE2:.+]] = call i64 @llvm.coro.size.aligned.i64() // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv24promise_sized_delete_tagEE12promise_typedlEPvm(i8* %[[MEM]], i64 %[[SIZE2]]) co_return; } @@ -229,7 +229,7 @@ // CHECK: %[[RetVal:.+]] = alloca i32 // CHECK: %[[Gro:.+]] = alloca i32 // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 - // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.aligned.i64() // CHECK: %[[MEM:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[SIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow) // CHECK: %[[OK:.+]] = icmp ne i8* %[[MEM]], null // CHECK: br i1 %[[OK]], label %[[OKBB:.+]], label %[[ERRBB:.+]] diff --git a/clang/test/CodeGenCoroutines/coro-builtins.c b/clang/test/CodeGenCoroutines/coro-builtins.c --- a/clang/test/CodeGenCoroutines/coro-builtins.c +++ b/clang/test/CodeGenCoroutines/coro-builtins.c @@ -20,7 +20,7 @@ // CHECK-NEXT: call i8* @llvm.coro.noop() __builtin_coro_noop(); - // CHECK-NEXT: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK-NEXT: %[[SIZE:.+]] = call i64 @llvm.coro.size.aligned.i64() // CHECK-NEXT: %[[MEM:.+]] = call i8* @myAlloc(i64 %[[SIZE]]) // CHECK-NEXT: %[[FRAME:.+]] = call i8* @llvm.coro.begin(token %[[COROID]], i8* %[[MEM]]) __builtin_coro_begin(myAlloc(__builtin_coro_size())); diff --git a/clang/test/CodeGenCoroutines/coro-gro.cpp b/clang/test/CodeGenCoroutines/coro-gro.cpp --- a/clang/test/CodeGenCoroutines/coro-gro.cpp +++ b/clang/test/CodeGenCoroutines/coro-gro.cpp @@ -48,7 +48,7 @@ // CHECK: %[[RetVal:.+]] = alloca i32 // CHECK: %[[GroActive:.+]] = alloca i1 - // CHECK: %[[Size:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[Size:.+]] = call i64 @llvm.coro.size.aligned.i64() // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[Size]]) // CHECK: store i1 false, i1* %[[GroActive]] // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_typeC1Ev( diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst --- a/llvm/docs/Coroutines.rst +++ b/llvm/docs/Coroutines.rst @@ -948,6 +948,35 @@ The `coro.size` intrinsic is lowered to a constant representing the size of the coroutine frame. +.. _coro.size.aligned: + +'llvm.coro.size.aligned' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare i32 @llvm.coro.size.aligned.i32() + declare i64 @llvm.coro.size.aligned.i64() + +Overview: +""""""""" + +The '``llvm.coro.size.aligned``' intrinsic returns the number of bytes +allocated by a memory allocator to store a `coroutine frame`_. It is usually +greater than or equal to '``llvm.coro.size``'. + +Arguments: +"""""""""" + +None + +Semantics: +"""""""""" + +Using this intrinsic indicates to LLVM that it should handle overaligned +`coroutine frame`_ by requesting more memory than needed to store a +`coroutine frame`_ to satisfy its memory alignment requirement. This is only +supported for switched-resume coroutines. + .. _coro.begin: 'llvm.coro.begin' Intrinsic diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1237,6 +1237,7 @@ def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; +def int_coro_size_aligned : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>; def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>; diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -14,17 +14,21 @@ // the value into the coroutine frame. //===----------------------------------------------------------------------===// +#include "CoroInstr.h" #include "CoroInternal.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/PtrUseVisitor.h" #include "llvm/Analysis/StackLifetime.h" #include "llvm/Config/llvm-config.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" @@ -486,6 +490,8 @@ return StructAlign; } + SmallVector &getFields() { return Fields; } + FieldIDType getLayoutFieldIndex(FieldIDType Id) const { assert(IsFinished && "not yet finished!"); return Fields[Id].LayoutFieldIndex; @@ -710,6 +716,54 @@ IsFinished = true; } +// Adapted from CodeGenFunction::EmitBuiltinAlignTo. +static Value *emitAlignUpTo(IRBuilder<> &Builder, Value *Src, uint64_t Align) { + const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); + + auto *SrcType = cast(Src->getType()); + IntegerType *IntType = IntegerType::get(Builder.getContext(), + DL.getIndexTypeSizeInBits(SrcType)); + Value *Alignment = ConstantInt::get(IntType, Align); + auto *One = ConstantInt::get(IntType, 1); + Value *Mask = Builder.CreateSub(Alignment, One, "mask"); + Value *SrcAddr = Builder.CreatePtrToInt(Src, IntType, "intptr"); + + // When aligning up we have to first add the mask to ensure we go over the + // next alignment value and then align down to the next valid multiple. + // By adding the mask, we ensure that align_up on an already aligned + // value will not change the value. + Value *SrcForMask = Builder.CreateAdd(SrcAddr, Mask, "over_boundary"); + + // Invert the mask to only clear the lower bits. + Value *InvertedMask = Builder.CreateNot(Mask, "inverted_mask"); + Value *Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result"); + + Result->setName("aligned_intptr"); + Value *Difference = Builder.CreateSub(Result, SrcAddr, "diff"); + // The result must point to the same underlying allocation. This means we + // can use an inbounds GEP to enable better optimization. + + PointerType *DestType = Builder.getInt8PtrTy(); + if (unsigned AddrSpace = SrcType->getAddressSpace()) + DestType = Type::getInt8PtrTy(Builder.getContext(), AddrSpace); + + Value *Base = Src; + if (SrcType != DestType) + Base = Builder.CreateBitCast(Src, DestType); + + // Out-of-bound case could not happen. + Result = Builder.CreateGEP(Base, Difference, "aligned_result"); + Result = Builder.CreatePointerCast(Result, SrcType); + + Type *IntPtrTy = Builder.getIntPtrTy(DL); + if (Alignment->getType() != IntPtrTy) + Alignment = + Builder.CreateIntCast(Alignment, IntPtrTy, false, "casted.align"); + (void)Builder.CreateAlignmentAssumption(DL, Result, Alignment); + assert(Result->getType() == SrcType); + return Result; +} + // Build a struct that will keep state for an active coroutine. // struct f.frame { // ResumeFnTy ResumeFnAddr; @@ -764,21 +818,60 @@ // Because multiple allocas may own the same field slot, // we add allocas to field here. B.addFieldForAllocas(F, FrameData, Shape); - // Add PromiseAlloca to Allocas list so that - // 1. updateLayoutIndex could update its index after - // `performOptimizedStructLayout` - // 2. it is processed in insertSpills. - if (Shape.ABI == coro::ABI::Switch && PromiseAlloca) - // We assume that the promise alloca won't be modified before - // CoroBegin and no alias will be create before CoroBegin. - FrameData.Allocas.emplace_back( - PromiseAlloca, DenseMap>{}, false); + // Create an entry for every spilled value. for (auto &S : FrameData.Spills) { FieldIDType Id = B.addField(S.first->getType(), None); FrameData.setFieldIndex(S.first, Id); } + Optional FramePtrField = None; + if (Shape.ABI == coro::ABI::Switch) { + // Add PromiseAlloca to Allocas list so that + // 1. updateLayoutIndex could update its index after + // `performOptimizedStructLayout` + // 2. it is processed in insertSpills. + if (PromiseAlloca) + // We assume that the promise alloca won't be modified before + // CoroBegin and no alias will be create before CoroBegin. + FrameData.Allocas.emplace_back( + PromiseAlloca, DenseMap>{}, + false); + + Align FrameAlign = + std::max_element( + B.getFields().begin(), B.getFields().end(), + [](auto &F1, auto &F2) { return F1.Alignment < F2.Alignment; }) + ->Alignment; + + // Check for over-alignment. + if (!Shape.CoroSizeAligneds.empty() && + FrameAlign > Shape.getSwitchCoroId()->getAlignment()) { + BasicBlock &Entry = F.getEntryBlock(); + IRBuilder<> Builder(&Entry, Entry.getFirstInsertionPt()); + + // Save raw frame pointer to alloca + Value *Mem = Shape.CoroBegin->getMem(); + AllocaInst *FramePtrAddr = + Builder.CreateAlloca(Mem->getType(), nullptr, "alloc.frame.ptr"); + Builder.SetInsertPoint(Shape.CoroBegin); + Value *MockMem = Builder.CreatePointerCast(FramePtrAddr, Mem->getType()); + Builder.CreateStore(MockMem, FramePtrAddr); + + // Ajust frame pointer value. + Value *NewMem = emitAlignUpTo(Builder, MockMem, FrameAlign.value()); + Mem->replaceAllUsesWith(NewMem); + MockMem->replaceAllUsesWith(Mem); + cast(MockMem)->eraseFromParent(); + + // Add alloca to frame. + FramePtrField = B.addFieldForAlloca(FramePtrAddr); + FrameData.setFieldIndex(FramePtrAddr, *FramePtrField); + FrameData.Allocas.emplace_back( + FramePtrAddr, DenseMap>{}, true); + } + } + B.finish(FrameTy); FrameData.updateLayoutIndex(B); Shape.FrameAlign = B.getStructAlign(); @@ -790,6 +883,10 @@ Shape.SwitchLowering.IndexField = B.getLayoutFieldIndex(*SwitchIndexFieldId); + if (FramePtrField) + Shape.SwitchLowering.FramePtrField = + B.getLayoutFieldIndex(*FramePtrField); + // Also round the frame size up to a multiple of its alignment, as is // generally expected in C/C++. Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign); diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h --- a/llvm/lib/Transforms/Coroutines/CoroInstr.h +++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h @@ -121,6 +121,10 @@ : cast(Arg->stripPointerCasts()); } + unsigned getAlignment() const { + return cast(getArgOperand(AlignArg))->getZExtValue(); + } + void clearPromise() { Value *Arg = getArgOperand(PromiseArg); setArgOperand(PromiseArg, @@ -599,6 +603,18 @@ } }; +/// This represents the llvm.coro.size.aligned instruction. +class LLVM_LIBRARY_VISIBILITY CoroSizeAlignedInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_size_aligned; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + class LLVM_LIBRARY_VISIBILITY AnyCoroEndInst : public IntrinsicInst { enum { FrameArg, UnwindArg }; diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -44,10 +44,11 @@ #define CORO_DEVIRT_TRIGGER_FN "coro.devirt.trigger" namespace coro { +struct Shape; bool declaresIntrinsics(const Module &M, const std::initializer_list); -void replaceCoroFree(CoroIdInst *CoroId, bool Elide); +void replaceCoroFree(CoroIdInst *CoroId, bool Elide, Shape *Shape = nullptr); void updateCallGraph(Function &Caller, ArrayRef Funcs, CallGraph &CG, CallGraphSCC &SCC); /// Recover a dbg.declare prepared by the frontend and emit an alloca @@ -99,6 +100,7 @@ CoroBeginInst *CoroBegin; SmallVector CoroEnds; SmallVector CoroSizes; + SmallVector CoroSizeAligneds; SmallVector CoroSuspends; SmallVector SwiftErrorOps; @@ -132,6 +134,7 @@ AllocaInst *PromiseAlloca; BasicBlock *ResumeEntryBlock; unsigned IndexField; + Optional FramePtrField; bool HasFinalSuspend; }; @@ -268,7 +271,6 @@ /// \param CG - if non-null, will be updated for the new call void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const; - Shape() = default; explicit Shape(Function &F, bool ReuseFrameSlot = false) : ReuseFrameSlot(ReuseFrameSlot) { buildFrom(F); diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -962,7 +962,8 @@ // to suppress deallocation code. if (Shape.ABI == coro::ABI::Switch) coro::replaceCoroFree(cast(VMap[Shape.CoroBegin->getId()]), - /*Elide=*/ FKind == CoroCloner::Kind::SwitchCleanup); + /*Elide=*/FKind == CoroCloner::Kind::SwitchCleanup, + &Shape); } // Create a resume clone by cloning the body of the original function, setting @@ -1001,19 +1002,35 @@ if (Shape.ABI == coro::ABI::Async) updateAsyncFuncPointerContextSize(Shape); - if (Shape.CoroSizes.empty()) - return; + if (!Shape.CoroSizes.empty()) { + // In the same function all coro.sizes should have the same result type. + auto *SizeIntrin = Shape.CoroSizes.back(); + Module *M = SizeIntrin->getModule(); + const DataLayout &DL = M->getDataLayout(); + auto Size = DL.getTypeAllocSize(Shape.FrameTy); + auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); + + for (CoroSizeInst *CS : Shape.CoroSizes) { + CS->replaceAllUsesWith(SizeConstant); + CS->eraseFromParent(); + } + } + + if (!Shape.CoroSizeAligneds.empty()) { + auto *SizeIntrin = Shape.CoroSizeAligneds.back(); + Module *M = SizeIntrin->getModule(); + const DataLayout &DL = M->getDataLayout(); + auto Size = DL.getTypeAllocSize(Shape.FrameTy); - // In the same function all coro.sizes should have the same result type. - auto *SizeIntrin = Shape.CoroSizes.back(); - Module *M = SizeIntrin->getModule(); - const DataLayout &DL = M->getDataLayout(); - auto Size = DL.getTypeAllocSize(Shape.FrameTy); - auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); + uint64_t FrameAlign = Shape.FrameAlign.value(); + uint64_t NewAlign = Shape.getSwitchCoroId()->getAlignment(); + uint64_t Extra = FrameAlign > NewAlign ? FrameAlign - NewAlign : 0; + auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size + Extra); - for (CoroSizeInst *CS : Shape.CoroSizes) { - CS->replaceAllUsesWith(SizeConstant); - CS->eraseFromParent(); + for (CoroSizeAlignedInst *CS : Shape.CoroSizeAligneds) { + CS->replaceAllUsesWith(SizeConstant); + CS->eraseFromParent(); + } } } @@ -1250,7 +1267,7 @@ switch (Shape.ABI) { case coro::ABI::Switch: { auto SwitchId = cast(CoroId); - coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr); + coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr, &Shape); if (AllocInst) { IRBuilder<> Builder(AllocInst); auto *Frame = Builder.CreateAlloca(Shape.FrameTy); diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -171,7 +172,7 @@ // Replace all coro.frees associated with the provided CoroId either with 'null' // if Elide is true and with its frame parameter otherwise. -void coro::replaceCoroFree(CoroIdInst *CoroId, bool Elide) { +void coro::replaceCoroFree(CoroIdInst *CoroId, bool Elide, Shape *Shape) { SmallVector CoroFrees; for (User *U : CoroId->users()) if (auto CF = dyn_cast(U)) @@ -180,9 +181,25 @@ if (CoroFrees.empty()) return; - Value *Replacement = - Elide ? ConstantPointerNull::get(Type::getInt8PtrTy(CoroId->getContext())) - : CoroFrees.front()->getFrame(); + LLVMContext &Ctx = CoroId->getContext(); + PointerType *Int8PtrTy = Type::getInt8PtrTy(Ctx); + Value *Replacement = Elide ? ConstantPointerNull::get(Int8PtrTy) + : CoroFrees.front()->getFrame(); + + if (!Elide && Shape && Shape->SwitchLowering.FramePtrField) { + unsigned FramePtrField = *Shape->SwitchLowering.FramePtrField; + for (CoroFreeInst *CF : CoroFrees) { + IRBuilder<> Builder(CF); + Value *FramePtr = + Builder.CreateBitCast(Replacement, Shape->FrameTy->getPointerTo()); + Value *GEP = Builder.CreateConstGEP2_32(Shape->FrameTy, FramePtr, 0, + FramePtrField); + Value *LI = Builder.CreateLoad(Int8PtrTy, GEP, "raw.frame.ptr"); + CF->replaceAllUsesWith(LI); + CF->eraseFromParent(); + } + return; + } for (CoroFreeInst *CF : CoroFrees) { CF->replaceAllUsesWith(Replacement); @@ -268,6 +285,9 @@ case Intrinsic::coro_size: CoroSizes.push_back(cast(II)); break; + case Intrinsic::coro_size_aligned: + CoroSizeAligneds.push_back(cast(II)); + break; case Intrinsic::coro_frame: CoroFrames.push_back(cast(II)); break; @@ -375,6 +395,7 @@ this->SwitchLowering.ResumeSwitch = nullptr; this->SwitchLowering.PromiseAlloca = SwitchId->getPromise(); this->SwitchLowering.ResumeEntryBlock = nullptr; + this->SwitchLowering.FramePtrField = None; for (auto AnySuspend : CoroSuspends) { auto Suspend = dyn_cast(AnySuspend); diff --git a/llvm/test/Transforms/Coroutines/coro-overalign.ll b/llvm/test/Transforms/Coroutines/coro-overalign.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-overalign.ll @@ -0,0 +1,81 @@ +; Check that we will emit extra code to handle overaligned frame. +; RUN: opt < %s -coro-split -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -S | FileCheck %s + +%PackedStruct = type <{ i64 }> + +declare void @consume(%PackedStruct*) + +define i8* @f() "coroutine.presplit"="1" { +entry: + %data = alloca %PackedStruct, align 32 + %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.aligned.i32() + %alloc = call i8* @malloc(i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + call void @consume(%PackedStruct* %data) + %0 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %0, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + call void @consume(%PackedStruct* %data) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; See if the frame pointer was inserted. +; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i8*, i1, [7 x i8], %PackedStruct } + +; See if we over-allocate, adjust frame ptr start address and use a alloca to +; save the raw frame pointer. +; CHECK-LABEL: @f( +;CHECK: %alloc.frame.ptr = alloca i8*, align 8 +;CHECK: %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* bitcast ([3 x void (%f.Frame*)*]* @f.resumers to i8*)) +;CHECK: %alloc = call i8* @malloc(i32 56) +;CHECK: store i8* %alloc, i8** %alloc.frame.ptr, align 8 +;CHECK: %intptr = ptrtoint i8* %alloc to i64 +;CHECK: %over_boundary = add i64 %intptr, 31 +;CHECK: %aligned_intptr = and i64 %over_boundary, -32 +;CHECK: %diff = sub i64 %aligned_intptr, %intptr +;CHECK: %aligned_result = getelementptr i8, i8* %alloc, i64 %diff +;CHECK: call void @llvm.assume(i1 true) [ "align"(i8* %aligned_result, i64 32) ] +;CHECK: %hdl = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %aligned_result) + +; See if we emit correct deallocation code. + +; CHECK-LABEL: @f.resume( +; CHECK: %0 = getelementptr %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 +; CHECK-NEXT: %raw.frame.ptr = load i8*, i8** %0, align 8 +; CHECK-NEXT: call void @free(i8* %raw.frame.ptr) +; CHECK-NEXT: ret void + +; CHECK-LABEL: @f.destroy( +; CHECK: %0 = getelementptr %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 +; CHECK-NEXT: %raw.frame.ptr = load i8*, i8** %0, align 8 +; CHECK-NEXT: call void @free(i8* %raw.frame.ptr) +; CHECK-NEXT: ret void + +; CHECK-LABEL: @f.cleanup( +; CHECK: call void @free(i8* null) +; CHECK-NEXT: ret void + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.aligned.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @malloc(i32) +declare void @free(i8*)