diff --git a/clang/include/clang/AST/StmtCXX.h b/clang/include/clang/AST/StmtCXX.h --- a/clang/include/clang/AST/StmtCXX.h +++ b/clang/include/clang/AST/StmtCXX.h @@ -318,17 +318,19 @@ : public Stmt, private llvm::TrailingObjects { enum SubStmt { - Body, ///< The body of the coroutine. - Promise, ///< The promise statement. - InitSuspend, ///< The initial suspend statement, run before the body. - FinalSuspend, ///< The final suspend statement, run after the body. - OnException, ///< Handler for exceptions thrown in the body. - OnFallthrough, ///< Handler for control flow falling off the body. - Allocate, ///< Coroutine frame memory allocation. - Deallocate, ///< Coroutine frame memory deallocation. - ReturnValue, ///< Return value for thunk function: p.get_return_object(). - ResultDecl, ///< Declaration holding the result of get_return_object. - ReturnStmt, ///< Return statement for the thunk function. + Body, ///< The body of the coroutine. + Promise, ///< The promise statement. + InitSuspend, ///< The initial suspend statement, run before the body. + FinalSuspend, ///< The final suspend statement, run after the body. + OnException, ///< Handler for exceptions thrown in the body. + OnFallthrough, ///< Handler for control flow falling off the body. + Allocate, ///< Coroutine frame memory allocation. + Deallocate, ///< Coroutine frame memory deallocation. + AlignedAllocate, ///< Coroutine frame memory aligned allocation. + AlignedDeallocate, ///< Coroutine frame memory aligned deallocation. + ReturnValue, ///< Return value for thunk function: p.get_return_object(). + ResultDecl, ///< Declaration holding the result of get_return_object. + ReturnStmt, ///< Return statement for the thunk function. ReturnStmtOnAllocFailure, ///< Return statement if allocation failed. FirstParamMove ///< First offset for move construction of parameter copies. }; @@ -353,6 +355,8 @@ Stmt *OnFallthrough = nullptr; Expr *Allocate = nullptr; Expr *Deallocate = nullptr; + Expr *AlignedAllocate = nullptr; + Expr *AlignedDeallocate = nullptr; Expr *ReturnValue = nullptr; Stmt *ResultDecl = nullptr; Stmt *ReturnStmt = nullptr; @@ -406,6 +410,12 @@ Expr *getDeallocate() const { return cast_or_null(getStoredStmts()[SubStmt::Deallocate]); } + Expr *getAlignedAllocate() const { + return cast_or_null(getStoredStmts()[SubStmt::AlignedAllocate]); + } + Expr *getAlignedDeallocate() const { + return cast_or_null(getStoredStmts()[SubStmt::AlignedDeallocate]); + } Expr *getReturnValueInit() const { return cast(getStoredStmts()[SubStmt::ReturnValue]); } diff --git a/clang/lib/AST/StmtCXX.cpp b/clang/lib/AST/StmtCXX.cpp --- a/clang/lib/AST/StmtCXX.cpp +++ b/clang/lib/AST/StmtCXX.cpp @@ -117,6 +117,8 @@ SubStmts[CoroutineBodyStmt::OnFallthrough] = Args.OnFallthrough; SubStmts[CoroutineBodyStmt::Allocate] = Args.Allocate; SubStmts[CoroutineBodyStmt::Deallocate] = Args.Deallocate; + SubStmts[CoroutineBodyStmt::AlignedAllocate] = Args.AlignedAllocate; + SubStmts[CoroutineBodyStmt::AlignedDeallocate] = Args.AlignedDeallocate; SubStmts[CoroutineBodyStmt::ReturnValue] = Args.ReturnValue; SubStmts[CoroutineBodyStmt::ResultDecl] = Args.ResultDecl; SubStmts[CoroutineBodyStmt::ReturnStmt] = Args.ReturnStmt; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -16944,6 +16944,23 @@ auto *One = llvm::ConstantInt::get(IntType, 1); Mask = CGF.Builder.CreateSub(Alignment, One, "mask"); } + + BuiltinAlignArgs(llvm::Value *SrcV, llvm::Value *Align, + CodeGenFunction &CGF) { + Src = SrcV; + SrcType = Src->getType(); + if (SrcType->isPointerTy()) { + IntType = IntegerType::get( + CGF.getLLVMContext(), + CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType)); + } else { + assert(SrcType->isIntegerTy()); + IntType = cast(SrcType); + } + Alignment = Align; + auto *One = llvm::ConstantInt::get(IntType, 1); + Mask = CGF.Builder.CreateSub(Alignment, One, "mask"); + } }; } // namespace @@ -16959,12 +16976,10 @@ llvm::Constant::getNullValue(Args.IntType), "is_aligned")); } -/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up. -/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the -/// llvm.ptrmask instrinsic (with a GEP before in the align_up case). -/// TODO: actually use ptrmask once most optimization passes know about it. -RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { - BuiltinAlignArgs Args(E, *this); +llvm::Value *CodeGenFunction::EmitBuiltinAlignTo(void *ArgsPtr, const Expr *E, + bool AlignUp) { + assert(ArgsPtr); + const BuiltinAlignArgs &Args = *static_cast(ArgsPtr); llvm::Value *SrcAddr = Args.Src; if (Args.Src->getType()->isPointerTy()) SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr"); @@ -17003,7 +17018,23 @@ emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment); } assert(Result->getType() == Args.SrcType); - return RValue::get(Result); + return Result; +} + +/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up. +/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the +/// llvm.ptrmask instrinsic (with a GEP before in the align_up case). +/// TODO: actually use ptrmask once most optimization passes know about it. +RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { + BuiltinAlignArgs Args(E, *this); + return RValue::get(EmitBuiltinAlignTo(&Args, E, AlignUp)); +} + +llvm::Value *CodeGenFunction::EmitBuiltinAlignTo(llvm::Value *Src, + llvm::Value *Align, + const Expr *E, bool AlignUp) { + BuiltinAlignArgs Args(Src, Align, *this); + return EmitBuiltinAlignTo(&Args, E, AlignUp); } Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -12,9 +12,12 @@ #include "CGCleanup.h" #include "CodeGenFunction.h" -#include "llvm/ADT/ScopeExit.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtVisitor.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/IntrinsicInst.h" +#include using namespace clang; using namespace CodeGen; @@ -75,6 +78,7 @@ // Stores the last emitted coro.free for the deallocate expressions, we use it // to wrap dealloc code with if(auto mem = coro.free) dealloc(mem). llvm::CallInst *LastCoroFree = nullptr; + bool LastCoroFreeUsedForDealloc = false; // If coro.id came from the builtin, remember the expression to give better // diagnostic. If CoroIdExpr is nullptr, the coro.id was created by @@ -412,12 +416,94 @@ } } }; + +void overAllocateFrame(CodeGenFunction &CGF, llvm::CallInst *CI, bool IsAlloc) { + unsigned CoroSizeIdx = IsAlloc ? 0 : 1; + CGBuilderTy &Builder = CGF.Builder; + auto OrigIP = Builder.saveIP(); + Builder.SetInsertPoint(CI); + llvm::Function *CoroAlign = + CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_align, CGF.SizeTy); + const auto &TI = CGF.CGM.getContext().getTargetInfo(); + unsigned AlignOfNew = TI.getNewAlign() / TI.getCharWidth(); + auto *AlignCall = Builder.CreateCall(CoroAlign); + auto *AlignOfNewInt = llvm::ConstantInt::get(CGF.SizeTy, AlignOfNew, true); + auto *Diff = Builder.CreateNSWSub(AlignCall, AlignOfNewInt); + auto *NewCoroSize = Builder.CreateAdd(CI->getArgOperand(CoroSizeIdx), Diff); + CI->setArgOperand(CoroSizeIdx, NewCoroSize); + Builder.restoreIP(OrigIP); +} + +bool hasAlignArg(llvm::CallInst *MemCall) { + return llvm::any_of(MemCall->args(), [](llvm::Use &Arg) { + auto *FrameAlign = dyn_cast(&Arg); + return FrameAlign && + FrameAlign->getIntrinsicID() == llvm::Intrinsic::coro_align; + }); +} + +void emitDynamicAlignedDealloc(CodeGenFunction &CGF, + llvm::BasicBlock *AlignedFreeBB, + llvm::CallInst *CoroFree) { + llvm::CallInst *Dealloc = nullptr; + for (llvm::User *U : CoroFree->users()) { + if (auto *CI = dyn_cast(U)) + if (CI->getParent() == CGF.Builder.GetInsertBlock()) + Dealloc = CI; + } + assert(Dealloc); + + CGF.Builder.SetInsertPoint(AlignedFreeBB->getFirstNonPHI()); + + // Replace `coro.free` argument with the address from coroutine frame. + + llvm::Function *RawFramePtrOffsetIntrin = CGF.CGM.getIntrinsic( + llvm::Intrinsic::coro_raw_frame_ptr_offset, CGF.Int32Ty); + auto *RawFramePtrOffset = CGF.Builder.CreateCall(RawFramePtrOffsetIntrin); + auto *FramePtrAddrStart = + CGF.Builder.CreateInBoundsGEP(CoroFree, {RawFramePtrOffset}); + auto *FramePtrAddr = CGF.Builder.CreatePointerCast( + FramePtrAddrStart, CGF.Int8PtrTy->getPointerTo()); + auto *FramePtr = + CGF.Builder.CreateLoad({FramePtrAddr, CGF.getPointerAlign()}); + Dealloc->setArgOperand(0, FramePtr); + + // Match size_t argument with the one used during allocation. + + assert(Dealloc->getNumArgOperands() >= 1); + if (Dealloc->getNumArgOperands() > 1) { + // Size may only be the second argument of allocator call. + if (auto *CoroSize = + dyn_cast(Dealloc->getArgOperand(1))) + if (CoroSize->getIntrinsicID() == llvm::Intrinsic::coro_size) + overAllocateFrame(CGF, Dealloc, /*IsAlloc*/ false); + } + + CGF.Builder.SetInsertPoint(AlignedFreeBB); +} + +void emitCheckAlignBasicBlock(CodeGenFunction &CGF, + llvm::BasicBlock *CheckAlignBB, + llvm::BasicBlock *AlignBB, + llvm::BasicBlock *NonAlignBB) { + CGF.EmitBlock(CheckAlignBB); + + auto &Builder = CGF.Builder; + auto &TI = CGF.CGM.getContext().getTargetInfo(); + unsigned NewAlign = TI.getNewAlign() / TI.getCharWidth(); + auto *CoroAlign = Builder.CreateCall( + CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_align, CGF.SizeTy)); + auto *AlignOfNew = llvm::ConstantInt::get(CGF.SizeTy, NewAlign); + auto *Cmp = + Builder.CreateICmp(llvm::CmpInst::ICMP_UGT, CoroAlign, AlignOfNew); + Builder.CreateCondBr(Cmp, AlignBB, NonAlignBB); } -namespace { // Make sure to call coro.delete on scope exit. struct CallCoroDelete final : public EHScopeStack::Cleanup { Stmt *Deallocate; + Stmt *AlignedDeallocate; + bool DynamicAlignedDealloc; // Emit "if (coro.free(CoroId, CoroBegin)) Deallocate;" @@ -432,21 +518,34 @@ // call. BasicBlock *SaveInsertBlock = CGF.Builder.GetInsertBlock(); + auto *CheckAlignBB = CGF.createBasicBlock("coro.free.check.align"); + auto *AlignedFreeBB = CGF.createBasicBlock("coro.free.align"); auto *FreeBB = CGF.createBasicBlock("coro.free"); + auto *AfterFreeBB = CGF.createBasicBlock("after.coro.free"); + + emitCheckAlignBasicBlock(CGF, CheckAlignBB, AlignedFreeBB, FreeBB); + CGF.EmitBlock(FreeBB); CGF.EmitStmt(Deallocate); - - auto *AfterFreeBB = CGF.createBasicBlock("after.coro.free"); - CGF.EmitBlock(AfterFreeBB); + CGF.Builder.CreateBr(AfterFreeBB); // We should have captured coro.free from the emission of deallocate. auto *CoroFree = CGF.CurCoro.Data->LastCoroFree; + CGF.CurCoro.Data->LastCoroFreeUsedForDealloc = true; if (!CoroFree) { CGF.CGM.Error(Deallocate->getBeginLoc(), "Deallocation expressoin does not refer to coro.free"); return; } + CGF.EmitBlock(AlignedFreeBB); + CGF.EmitStmt(AlignedDeallocate); + CGF.CurCoro.Data->LastCoroFreeUsedForDealloc = false; + if (DynamicAlignedDealloc) + emitDynamicAlignedDealloc(CGF, AlignedFreeBB, CoroFree); + + CGF.EmitBlock(AfterFreeBB); + // Get back to the block we were originally and move coro.free there. auto *InsertPt = SaveInsertBlock->getTerminator(); CoroFree->moveBefore(InsertPt); @@ -455,15 +554,18 @@ // Add if (auto *mem = coro.free) Deallocate; auto *NullPtr = llvm::ConstantPointerNull::get(CGF.Int8PtrTy); auto *Cond = CGF.Builder.CreateICmpNE(CoroFree, NullPtr); - CGF.Builder.CreateCondBr(Cond, FreeBB, AfterFreeBB); + CGF.Builder.CreateCondBr(Cond, CheckAlignBB, AfterFreeBB); // No longer need old terminator. InsertPt->eraseFromParent(); CGF.Builder.SetInsertPoint(AfterFreeBB); } - explicit CallCoroDelete(Stmt *DeallocStmt) : Deallocate(DeallocStmt) {} + explicit CallCoroDelete(Stmt *DeallocStmt, Stmt *AlignedDeallocStmt, + bool DynamicAlignedDealloc) + : Deallocate(DeallocStmt), AlignedDeallocate(AlignedDeallocStmt), + DynamicAlignedDealloc(DynamicAlignedDealloc) {} }; -} +} // namespace namespace { struct GetReturnObjectManager { @@ -547,9 +649,13 @@ auto *EntryBB = Builder.GetInsertBlock(); auto *AllocBB = createBasicBlock("coro.alloc"); + auto *AlignAllocBB = createBasicBlock("coro.alloc.align"); + auto *CheckAlignBB = createBasicBlock("coro.alloc.check.align"); auto *InitBB = createBasicBlock("coro.init"); auto *FinalBB = createBasicBlock("coro.final"); auto *RetBB = createBasicBlock("coro.ret"); + llvm::BasicBlock *RetOnFailureBB = nullptr; + llvm::BasicBlock *AlignAllocBB2 = nullptr; auto *CoroId = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::coro_id), @@ -564,7 +670,9 @@ auto *CoroAlloc = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::coro_alloc), {CoroId}); - Builder.CreateCondBr(CoroAlloc, AllocBB, InitBB); + Builder.CreateCondBr(CoroAlloc, CheckAlignBB, InitBB); + + emitCheckAlignBasicBlock(*this, CheckAlignBB, AlignAllocBB, AllocBB); EmitBlock(AllocBB); auto *AllocateCall = EmitScalarExpr(S.getAllocate()); @@ -572,10 +680,9 @@ // Handle allocation failure if 'ReturnStmtOnAllocFailure' was provided. if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure()) { - auto *RetOnFailureBB = createBasicBlock("coro.ret.on.failure"); + RetOnFailureBB = createBasicBlock("coro.ret.on.failure"); // See if allocation was successful. - auto *NullPtr = llvm::ConstantPointerNull::get(Int8PtrTy); auto *Cond = Builder.CreateICmpNE(AllocateCall, NullPtr); Builder.CreateCondBr(Cond, InitBB, RetOnFailureBB); @@ -587,12 +694,48 @@ Builder.CreateBr(InitBB); } + EmitBlock(AlignAllocBB); + + auto *AlignedAllocateCall = EmitScalarExpr(S.getAlignedAllocate()); + bool HasAlignArg = hasAlignArg(cast(AlignedAllocateCall)); + + if (!HasAlignArg) + overAllocateFrame(*this, cast(AlignedAllocateCall), + /*IsAlloc*/ true); + + if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure()) { + auto *Cond = Builder.CreateICmpNE(AlignedAllocateCall, NullPtr); + if (HasAlignArg) { + Builder.CreateCondBr(Cond, InitBB, RetOnFailureBB); + } else { + AlignAllocBB2 = createBasicBlock("coro.alloc.align2"); + Builder.CreateCondBr(Cond, AlignAllocBB2, RetOnFailureBB); + EmitBlock(AlignAllocBB2); + } + } + + if (!HasAlignArg) { + auto *CoroAlign = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::coro_align, SizeTy)); + auto *AlignedUpAddr = EmitBuiltinAlignTo(AlignedAllocateCall, CoroAlign, + S.getAlignedAllocate(), true); + auto *RawFramePtrAddrIntrin = + CGM.getIntrinsic(llvm::Intrinsic::coro_raw_frame_ptr_addr); + auto *RawFramePtrAddr = Builder.CreateCall(RawFramePtrAddrIntrin); + Builder.CreateStore(AlignedAllocateCall, + {RawFramePtrAddr, getPointerAlign()}); + AlignedAllocateCall = AlignedUpAddr; + } + EmitBlock(InitBB); // Pass the result of the allocation to coro.begin. - auto *Phi = Builder.CreatePHI(VoidPtrTy, 2); + auto *Phi = Builder.CreatePHI(VoidPtrTy, 3); Phi->addIncoming(NullPtr, EntryBB); Phi->addIncoming(AllocateCall, AllocOrInvokeContBB); + Phi->addIncoming(AlignedAllocateCall, + AlignAllocBB2 ? AlignAllocBB2 : AlignAllocBB); + auto *CoroBegin = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi}); CurCoro.Data->CoroBegin = CoroBegin; @@ -605,7 +748,8 @@ CGDebugInfo *DI = getDebugInfo(); ParamReferenceReplacerRAII ParamReplacer(LocalDeclMap); CodeGenFunction::RunCleanupsScope ResumeScope(*this); - EHStack.pushCleanup(NormalAndEHCleanup, S.getDeallocate()); + EHStack.pushCleanup(NormalAndEHCleanup, S.getDeallocate(), + S.getAlignedDeallocate(), !HasAlignArg); // Create mapping between parameters and copy-params for coroutine function. auto ParamMoves = S.getParamMoves(); @@ -729,6 +873,10 @@ case llvm::Intrinsic::coro_alloc: case llvm::Intrinsic::coro_begin: case llvm::Intrinsic::coro_free: { + // Make deallocation and aligned deallocation share one `coro.free`. + if (CurCoro.Data && CurCoro.Data->LastCoroFreeUsedForDealloc) + return RValue::get(CurCoro.Data->LastCoroFree); + if (CurCoro.Data && CurCoro.Data->CoroId) { Args.push_back(CurCoro.Data->CoroId); break; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1917,6 +1917,8 @@ void EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn); + llvm::Value *EmitBuiltinAlignTo(void *Args, const Expr *E, bool AlignUp); + public: CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext=false); ~CodeGenFunction(); @@ -4060,6 +4062,8 @@ RValue EmitBuiltinIsAligned(const CallExpr *E); /// Emit IR for __builtin_align_up/__builtin_align_down. RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp); + llvm::Value *EmitBuiltinAlignTo(llvm::Value *Src, llvm::Value *Align, + const Expr *E, bool AlignUp); llvm::Function *generateBuiltinOSLogHelperFunction( const analyze_os_log::OSLogBufferLayout &Layout, diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -1423,7 +1423,9 @@ return false; this->Allocate = NewExpr.get(); + this->AlignedAllocate = this->Allocate; this->Deallocate = DeleteExpr.get(); + this->AlignedDeallocate = this->Deallocate; return true; } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -7817,6 +7817,19 @@ return StmtError(); Builder.Deallocate = DeallocRes.get(); + assert(S->getAlignedAllocate() && S->getAlignedDeallocate() && + "aligned allocation and deallocation calls must already be built"); + ExprResult AlignedAllocRes = getDerived().TransformExpr(S->getAllocate()); + if (AlignedAllocRes.isInvalid()) + return StmtError(); + Builder.AlignedAllocate = AlignedAllocRes.get(); + + ExprResult AlignedDeallocRes = + getDerived().TransformExpr(S->getDeallocate()); + if (AlignedDeallocRes.isInvalid()) + return StmtError(); + Builder.AlignedDeallocate = AlignedDeallocRes.get(); + assert(S->getResultDecl() && "ResultDecl must already be built"); StmtResult ResultDecl = getDerived().TransformStmt(S->getResultDecl()); if (ResultDecl.isInvalid()) diff --git a/clang/test/CodeGenCoroutines/coro-alloc.cpp b/clang/test/CodeGenCoroutines/coro-alloc.cpp --- a/clang/test/CodeGenCoroutines/coro-alloc.cpp +++ b/clang/test/CodeGenCoroutines/coro-alloc.cpp @@ -57,24 +57,55 @@ extern "C" void f0(global_new_delete_tag) { // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[NeedAlloc:.+]] = call i1 @llvm.coro.alloc(token %[[ID]]) - // CHECK: br i1 %[[NeedAlloc]], label %[[AllocBB:.+]], label %[[InitBB:.+]] + // CHECK: br i1 %[[NeedAlloc]], label %[[CheckAlignBB:.+]], label %[[InitBB:.+]] + + // CHECK: [[CheckAlignBB]]: + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], 16 + // CHECK: br i1 %[[CMP]], label %[[AlignAllocBB:.+]], label %[[AllocBB:.+]] // CHECK: [[AllocBB]]: + // CHECK-NEXT: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK-NEXT: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) + // CHECK-NEXT: br label %[[InitBB:.+]] + + // CHECK: [[AlignAllocBB]]: // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[PAD:.+]] = sub nsw i64 %[[ALIGN]], 16 + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %[[PAD]] + // CHECK: %[[MEM2:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[NEWSIZE]]) + // CHECK: %[[ALIGN2:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[ALIGNED:.+]] = getelementptr inbounds i8, i8* %[[MEM2]], + // CHECK: call void @llvm.assume(i1 true) [ "align"(i8* %[[ALIGNED]], i64 %[[ALIGN2]]) ] + // CHECK: %[[ADDR:.+]] = call i8** @llvm.coro.raw.frame.ptr.addr() + // CHECK: store i8* %[[MEM2]], i8** %[[ADDR]], align 8 // CHECK: br label %[[InitBB]] // CHECK: [[InitBB]]: - // CHECK: %[[PHI:.+]] = phi i8* [ null, %{{.+}} ], [ %call, %[[AllocBB]] ] + // CHECK: %[[PHI:.+]] = phi i8* [ null, %{{.+}} ], [ %[[MEM]], %[[AllocBB]] ], [ %[[ALIGNED]], %[[AlignAllocBB]] ] // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin(token %[[ID]], i8* %[[PHI]]) // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) // CHECK: %[[NeedDealloc:.+]] = icmp ne i8* %[[MEM]], null - // CHECK: br i1 %[[NeedDealloc]], label %[[FreeBB:.+]], label %[[Afterwards:.+]] + // CHECK: br i1 %[[NeedDealloc]], label %[[CheckAlignBB:.+]], label %[[Afterwards:.+]] + + // CHECK: [[CheckAlignBB]]: + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], 16 + // CHECK: br i1 %[[CMP]], label %[[AlignedFreeBB:.+]], label %[[FreeBB:.+]] // CHECK: [[FreeBB]]: - // CHECK: call void @_ZdlPv(i8* %[[MEM]]) - // CHECK: br label %[[Afterwards]] + // CHECK-NEXT: call void @_ZdlPv(i8* %[[MEM]]) + // CHECK-NEXT: br label %[[Afterwards]] + + // CHECK: [[AlignedFreeBB]]: + // CHECK-NEXT: %[[OFFSET:.+]] = call i32 @llvm.coro.raw.frame.ptr.offset.i32() + // CHECK-NEXT: %[[ADDR:.+]] = getelementptr inbounds i8, i8* %[[MEM]], i32 %[[OFFSET]] + // CHECK-NEXT: %[[ADDR2:.+]] = bitcast i8* %[[ADDR]] to i8** + // CHECK-NEXT: %[[MEM:.+]] = load i8*, i8** %[[ADDR2]], align 8 + // CHECK-NEXT: call void @_ZdlPv(i8* %[[MEM]]) + // CHECK-NEXT: br label %[[Afterwards]] // CHECK: [[Afterwards]]: // CHECK: ret void @@ -157,6 +188,7 @@ // CHECK-LABEL: f1b( extern "C" void f1b(promise_matching_global_placement_new_tag, dummy *) { // CHECK: call noalias nonnull i8* @_Znwm(i64 + // CHECK-NOT: call noalias nonnull i8* @_ZnwmSt11align_val_t(i64 co_return; } @@ -182,6 +214,7 @@ // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv18promise_delete_tagEE12promise_typedlEPv(i8* %[[MEM]]) + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv18promise_delete_tagEE12promise_typedlEPv(i8* co_return; } @@ -229,16 +262,41 @@ // CHECK: %[[RetVal:.+]] = alloca i32 // CHECK: %[[Gro:.+]] = alloca i32 // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 + // CHECK: br i1 %{{.*}}, label %[[CheckAlignBB:.+]], label %[[OKBB:.+]] + + // CHECK: [[CheckAlignBB]]: + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], 16 + // CHECK: br i1 %[[CMP]], label %[[AlignAllocBB:.+]], label %[[AllocBB:.+]] + + // CHECK: [[AllocBB]]: // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() // CHECK: %[[MEM:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[SIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow) // CHECK: %[[OK:.+]] = icmp ne i8* %[[MEM]], null - // CHECK: br i1 %[[OK]], label %[[OKBB:.+]], label %[[ERRBB:.+]] + // CHECK: br i1 %[[OK]], label %[[OKBB]], label %[[ERRBB:.+]] // CHECK: [[ERRBB]]: // CHECK: %[[FailRet:.+]] = call i32 @_ZNSt12experimental16coroutine_traitsIJi28promise_on_alloc_failure_tagEE12promise_type39get_return_object_on_allocation_failureEv( // CHECK: store i32 %[[FailRet]], i32* %[[RetVal]] // CHECK: br label %[[RetBB:.+]] + // CHECK: [[AlignAllocBB]]: + // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[PAD:.+]] = sub nsw i64 %[[ALIGN]], 16 + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %[[PAD]] + // CHECK: %[[MEM2:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[NEWSIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow) + // CHECK: %[[OK:.+]] = icmp ne i8* %[[MEM2]], null + // CHECK: br i1 %[[OK]], label %[[AlignAllocBB2:.+]], label %[[ERRBB:.+]] + + // CHECK: [[AlignAllocBB2]]: + // CHECK: %[[ALIGN2:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[ALIGNED:.+]] = getelementptr inbounds i8, i8* %[[MEM2]], + // CHECK: call void @llvm.assume(i1 true) [ "align"(i8* %[[ALIGNED]], i64 %[[ALIGN2]]) ] + // CHECK: %[[ADDR:.+]] = call i8** @llvm.coro.raw.frame.ptr.addr() + // CHECK: store i8* %[[MEM2]], i8** %[[ADDR]], align 8 + // CHECK: br label %[[OKBB]] + // CHECK: [[OKBB]]: // CHECK: %[[OkRet:.+]] = call i32 @_ZNSt12experimental16coroutine_traitsIJi28promise_on_alloc_failure_tagEE12promise_type17get_return_objectEv( // CHECK: store i32 %[[OkRet]], i32* %[[Gro]] diff --git a/clang/test/CodeGenCoroutines/coro-cleanup.cpp b/clang/test/CodeGenCoroutines/coro-cleanup.cpp --- a/clang/test/CodeGenCoroutines/coro-cleanup.cpp +++ b/clang/test/CodeGenCoroutines/coro-cleanup.cpp @@ -78,12 +78,46 @@ // CHECK: [[Cleanup]]: // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_typeD1Ev( - // CHECK: %[[Mem0:.+]] = call i8* @llvm.coro.free( - // CHECK: call void @_ZdlPv(i8* %[[Mem0]] + // CHECK: %[[MEM0:.+]] = call i8* @llvm.coro.free( + // CHECK: br i1 %{{.*}}, label %[[CheckAlignBB:.+]], label %[[Afterwards:.+]] + + // CHECK: [[CheckAlignBB]]: + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], + // CHECK: br i1 %[[CMP]], label %[[AlignedFreeBB:.+]], label %[[FreeBB:.+]] + + // CHECK: [[FreeBB]]: + // CHECK: call void @_ZdlPv(i8* %[[MEM0]] + // CHECK: br label %[[Afterwards]] + + // CHECK: [[AlignedFreeBB]]: + // CHECK-NEXT: %[[OFFSET:.+]] = call i32 @llvm.coro.raw.frame.ptr.offset.i32() + // CHECK-NEXT: %[[ADDR:.+]] = getelementptr inbounds i8, i8* %[[MEM0]], i32 %[[OFFSET]] + // CHECK-NEXT: %[[ADDR2:.+]] = bitcast i8* %[[ADDR]] to i8** + // CHECK-NEXT: %[[MEM:.+]] = load i8*, i8** %[[ADDR2]], align 8 + // CHECK-NEXT: call void @_ZdlPv(i8* %[[MEM]]) + // CHECK-NEXT: br label %[[Afterwards]] // CHECK: [[Dealloc]]: - // CHECK: %[[Mem:.+]] = call i8* @llvm.coro.free( - // CHECK: call void @_ZdlPv(i8* %[[Mem]]) + // CHECK: %[[MEM0:.+]] = call i8* @llvm.coro.free( + // CHECK: br i1 %{{.*}}, label %[[CheckAlignBB:.+]], label %[[Afterwards:.+]] + + // CHECK: [[CheckAlignBB]]: + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], + // CHECK: br i1 %[[CMP]], label %[[AlignedFreeBB:.+]], label %[[FreeBB:.+]] + + // CHECK: [[FreeBB]]: + // CHECK: call void @_ZdlPv(i8* %[[MEM0]] + // CHECK: br label %[[Afterwards]] + + // CHECK: [[AlignedFreeBB]]: + // CHECK-NEXT: %[[OFFSET:.+]] = call i32 @llvm.coro.raw.frame.ptr.offset.i32() + // CHECK-NEXT: %[[ADDR:.+]] = getelementptr inbounds i8, i8* %[[MEM0]], i32 %[[OFFSET]] + // CHECK-NEXT: %[[ADDR2:.+]] = bitcast i8* %[[ADDR]] to i8** + // CHECK-NEXT: %[[MEM:.+]] = load i8*, i8** %[[ADDR2]], align 8 + // CHECK-NEXT: call void @_ZdlPv(i8* %[[MEM]]) + // CHECK-NEXT: br label %[[Afterwards]] co_return; } diff --git a/clang/test/CodeGenCoroutines/coro-gro.cpp b/clang/test/CodeGenCoroutines/coro-gro.cpp --- a/clang/test/CodeGenCoroutines/coro-gro.cpp +++ b/clang/test/CodeGenCoroutines/coro-gro.cpp @@ -68,6 +68,7 @@ // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_typeD1Ev( // CHECK: %[[Mem:.+]] = call i8* @llvm.coro.free( // CHECK: call void @_ZdlPv(i8* %[[Mem]]) + // CHECK: call void @_ZdlPv(i8* %{{.*}}) // Initialize retval from Gro and destroy Gro