diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -16752,6 +16752,23 @@ auto *One = llvm::ConstantInt::get(IntType, 1); Mask = CGF.Builder.CreateSub(Alignment, One, "mask"); } + + BuiltinAlignArgs(llvm::Value *SrcV, llvm::Value *Align, + CodeGenFunction &CGF) { + Src = SrcV; + SrcType = Src->getType(); + if (SrcType->isPointerTy()) { + IntType = IntegerType::get( + CGF.getLLVMContext(), + CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType)); + } else { + assert(SrcType->isIntegerTy()); + IntType = cast(SrcType); + } + Alignment = Align; + auto *One = llvm::ConstantInt::get(IntType, 1); + Mask = CGF.Builder.CreateSub(Alignment, One, "mask"); + } }; } // namespace @@ -16767,12 +16784,10 @@ llvm::Constant::getNullValue(Args.IntType), "is_aligned")); } -/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up. -/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the -/// llvm.ptrmask instrinsic (with a GEP before in the align_up case). -/// TODO: actually use ptrmask once most optimization passes know about it. -RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { - BuiltinAlignArgs Args(E, *this); +llvm::Value *CodeGenFunction::EmitBuiltinAlignTo(void *ArgsPtr, const Expr *E, + bool AlignUp) { + assert(ArgsPtr); + const BuiltinAlignArgs &Args = *static_cast(ArgsPtr); llvm::Value *SrcAddr = Args.Src; if (Args.Src->getType()->isPointerTy()) SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr"); @@ -16811,7 +16826,23 @@ emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment); } assert(Result->getType() == Args.SrcType); - return RValue::get(Result); + return Result; +} + +/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up. +/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the +/// llvm.ptrmask instrinsic (with a GEP before in the align_up case). +/// TODO: actually use ptrmask once most optimization passes know about it. +RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { + BuiltinAlignArgs Args(E, *this); + return RValue::get(EmitBuiltinAlignTo(&Args, E, AlignUp)); +} + +llvm::Value *CodeGenFunction::EmitBuiltinAlignTo(llvm::Value *Src, + llvm::Value *Align, + const Expr *E, bool AlignUp) { + BuiltinAlignArgs Args(Src, Align, *this); + return EmitBuiltinAlignTo(&Args, E, AlignUp); } Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -15,6 +15,9 @@ #include "llvm/ADT/ScopeExit.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtVisitor.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/IntrinsicInst.h" +#include using namespace clang; using namespace CodeGen; @@ -415,6 +418,66 @@ } namespace { + +void overAllocateFrame(CodeGenFunction &CGF, llvm::CallInst *CI, bool IsAlloc) { + unsigned CoroSizeIdx = IsAlloc ? 0 : 1; + CodeGenModule &CGM = CGF.CGM; + CGBuilderTy &Builder = CGF.Builder; + auto OrigIP = Builder.saveIP(); + Builder.SetInsertPoint(CI); + llvm::Function *CoroAlign = + CGM.getIntrinsic(llvm::Intrinsic::coro_align, CGF.SizeTy); + const auto &TI = CGM.getContext().getTargetInfo(); + unsigned AlignOfNew = TI.getNewAlign() / TI.getCharWidth(); + auto *AlignCall = Builder.CreateCall(CoroAlign); + // int x = coro_align - AlignOfNew; + // coro_size + (x > 0 ? x : 0) + auto *AlignOfNewInt = llvm::ConstantInt::get(CGF.SizeTy, AlignOfNew, true); + auto *Diff = Builder.CreateNSWSub(AlignCall, AlignOfNewInt); + auto *Zero = llvm::ConstantInt::getSigned(CGF.SizeTy, 0); + auto *Cmp = Builder.CreateICmp(llvm::CmpInst::ICMP_SGT, Diff, Zero); + auto *Extra = Builder.CreateSelect(Cmp, Diff, Zero); + auto *NewCoroSize = Builder.CreateAdd(CI->getArgOperand(CoroSizeIdx), Extra); + CI->setArgOperand(CoroSizeIdx, NewCoroSize); + Builder.restoreIP(OrigIP); +} + +void handleOverAlignedFrame(CodeGenFunction &CGF, llvm::CallInst *CoroFree) { + // If the frame is not overaligned, this sequence should be optimized out. + auto SaveIP = CGF.Builder.saveIP(); + CGF.Builder.SetInsertPoint(CoroFree->getParent()->getFirstNonPHIOrDbg()); + assert(CoroFree->getNumUses() == 1); + auto *Dealloc = cast(CoroFree->user_back()); + llvm::Function *CoroAlign = + CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_align, CGF.SizeTy); + auto *AlignCall = CGF.Builder.CreateCall(CoroAlign); + const auto &TI = CGF.CGM.getContext().getTargetInfo(); + auto *AlignOfNew = + llvm::ConstantInt::get(CGF.SizeTy, TI.getNewAlign() / TI.getCharWidth()); + auto *Cmp = + CGF.Builder.CreateICmp(llvm::CmpInst::ICMP_UGT, AlignCall, AlignOfNew); + llvm::Function *RawFramePtrOffsetIntrin = CGF.CGM.getIntrinsic( + llvm::Intrinsic::coro_raw_frame_ptr_offset, CGF.Int32Ty); + auto *RawFramePtrOffset = CGF.Builder.CreateCall(RawFramePtrOffsetIntrin); + auto *FramePtrAddrStart = + CGF.Builder.CreateInBoundsGEP(CoroFree, {RawFramePtrOffset}); + auto *FramePtrAddr = CGF.Builder.CreatePointerCast( + FramePtrAddrStart, CGF.Int8PtrTy->getPointerTo()); + auto *FramePtr = + CGF.Builder.CreateLoad({FramePtrAddr, CGF.getPointerAlign()}); + auto *MemPtr = CGF.Builder.CreateSelect(Cmp, FramePtr, CoroFree); + + Dealloc->setArgOperand(0, MemPtr); + assert(Dealloc->getNumArgOperands() >= 1); + if (Dealloc->getNumArgOperands() > 1) { + // Size may only be the second argument of allocator call. + auto *CoroSize = cast(Dealloc->getArgOperand(1)); + if (CoroSize->getIntrinsicID() == llvm::Intrinsic::coro_size) + overAllocateFrame(CGF, Dealloc, /*IsAlloc*/ false); + } + CGF.Builder.restoreIP(SaveIP); +} + // Make sure to call coro.delete on scope exit. struct CallCoroDelete final : public EHScopeStack::Cleanup { Stmt *Deallocate; @@ -436,9 +499,6 @@ CGF.EmitBlock(FreeBB); CGF.EmitStmt(Deallocate); - auto *AfterFreeBB = CGF.createBasicBlock("after.coro.free"); - CGF.EmitBlock(AfterFreeBB); - // We should have captured coro.free from the emission of deallocate. auto *CoroFree = CGF.CurCoro.Data->LastCoroFree; if (!CoroFree) { @@ -447,6 +507,11 @@ return; } + handleOverAlignedFrame(CGF, CoroFree); + + auto *AfterFreeBB = CGF.createBasicBlock("after.coro.free"); + CGF.EmitBlock(AfterFreeBB); + // Get back to the block we were originally and move coro.free there. auto *InsertPt = SaveInsertBlock->getTerminator(); CoroFree->moveBefore(InsertPt); @@ -463,7 +528,7 @@ } explicit CallCoroDelete(Stmt *DeallocStmt) : Deallocate(DeallocStmt) {} }; -} +} // namespace namespace { struct GetReturnObjectManager { @@ -547,6 +612,8 @@ auto *EntryBB = Builder.GetInsertBlock(); auto *AllocBB = createBasicBlock("coro.alloc"); + auto *AlignAllocBB = createBasicBlock("coro.alloc.align"); + auto *CheckAlignBB = createBasicBlock("coro.check.align"); auto *InitBB = createBasicBlock("coro.init"); auto *FinalBB = createBasicBlock("coro.final"); auto *RetBB = createBasicBlock("coro.ret"); @@ -566,31 +633,55 @@ EmitBlock(AllocBB); auto *AllocateCall = EmitScalarExpr(S.getAllocate()); - auto *AllocOrInvokeContBB = Builder.GetInsertBlock(); + overAllocateFrame(*this, cast(AllocateCall), + /*IsAlloc*/ true); // Handle allocation failure if 'ReturnStmtOnAllocFailure' was provided. if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure()) { auto *RetOnFailureBB = createBasicBlock("coro.ret.on.failure"); // See if allocation was successful. - auto *NullPtr = llvm::ConstantPointerNull::get(Int8PtrTy); auto *Cond = Builder.CreateICmpNE(AllocateCall, NullPtr); - Builder.CreateCondBr(Cond, InitBB, RetOnFailureBB); + Builder.CreateCondBr(Cond, CheckAlignBB, RetOnFailureBB); // If not, return OnAllocFailure object. EmitBlock(RetOnFailureBB); EmitStmt(RetOnAllocFailure); } else { - Builder.CreateBr(InitBB); + Builder.CreateBr(CheckAlignBB); } + EmitBlock(CheckAlignBB); + + auto *CoroAlign = + Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::coro_align, SizeTy)); + auto *AlignOfNew = + llvm::ConstantInt::get(SizeTy, TI.getNewAlign() / TI.getCharWidth()); + auto *Cmp = + Builder.CreateICmp(llvm::CmpInst::ICMP_UGT, CoroAlign, AlignOfNew); + Builder.CreateCondBr(Cmp, AlignAllocBB, InitBB); + + EmitBlock(AlignAllocBB); + + auto *AllocateCallAlign = + EmitBuiltinAlignTo(AllocateCall, CoroAlign, S.getAllocate(), true); + llvm::Function *RawFramePtrOffsetIntrin = + CGM.getIntrinsic(llvm::Intrinsic::coro_raw_frame_ptr_offset, Int32Ty); + auto *RawFramePtrOffset = Builder.CreateCall(RawFramePtrOffsetIntrin); + auto *FramePtrAddrStart = + Builder.CreateInBoundsGEP(AllocateCallAlign, {RawFramePtrOffset}); + auto *FramePtrAddr = + Builder.CreatePointerCast(FramePtrAddrStart, Int8PtrTy->getPointerTo()); + Builder.CreateStore(AllocateCall, {FramePtrAddr, getPointerAlign()}); + EmitBlock(InitBB); // Pass the result of the allocation to coro.begin. - auto *Phi = Builder.CreatePHI(VoidPtrTy, 2); + auto *Phi = Builder.CreatePHI(VoidPtrTy, 3); Phi->addIncoming(NullPtr, EntryBB); - Phi->addIncoming(AllocateCall, AllocOrInvokeContBB); + Phi->addIncoming(AllocateCall, CheckAlignBB); + Phi->addIncoming(AllocateCallAlign, AlignAllocBB); auto *CoroBegin = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi}); CurCoro.Data->CoroBegin = CoroBegin; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1885,6 +1885,8 @@ void EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn); + llvm::Value *EmitBuiltinAlignTo(void *Args, const Expr *E, bool AlignUp); + public: CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext=false); ~CodeGenFunction(); @@ -4013,6 +4015,8 @@ RValue EmitBuiltinIsAligned(const CallExpr *E); /// Emit IR for __builtin_align_up/__builtin_align_down. RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp); + llvm::Value *EmitBuiltinAlignTo(llvm::Value *Src, llvm::Value *Align, + const Expr *E, bool AlignUp); llvm::Function *generateBuiltinOSLogHelperFunction( const analyze_os_log::OSLogBufferLayout &Layout, diff --git a/clang/test/CodeGenCoroutines/coro-alloc.cpp b/clang/test/CodeGenCoroutines/coro-alloc.cpp --- a/clang/test/CodeGenCoroutines/coro-alloc.cpp +++ b/clang/test/CodeGenCoroutines/coro-alloc.cpp @@ -60,12 +60,31 @@ // CHECK: br i1 %[[NeedAlloc]], label %[[AllocBB:.+]], label %[[InitBB:.+]] // CHECK: [[AllocBB]]: - // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) + // CHECK-NEXT: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK-NEXT: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK-NEXT: %[[DIFF:.+]] = sub nsw i64 %[[ALIGN]], 16 + // CHECK-NEXT: %[[CMP:.+]] = icmp sgt i64 %[[DIFF]], 0 + // CHECK-NEXT: %[[SEL:.+]] = select i1 %[[CMP]], i64 %[[DIFF]], i64 0 + // CHECK-NEXT: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %[[SEL]] + // CHECK-NEXT: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[NEWSIZE]]) + // CHECK-NEXT: br label %[[CheckAlignBB:.+]] + + // CHECK: [[CheckAlignBB]]: + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], 16 + // CHECK: br i1 %[[CMP]], label %[[AlignAllocBB:.+]], label %[[InitBB]] + + // CHECK: [[AlignAllocBB]]: + // CHECK: %[[ALIGNED:.+]] = getelementptr inbounds i8, i8* %[[MEM]] + // CHECK: call void @llvm.assume(i1 true) [ "align"(i8* %[[ALIGNED]], i64 %[[ALIGN]]) ] + // CHECK: %[[OFFSET:.+]] = call i32 @llvm.coro.raw.frame.ptr.offset.i32() + // CHECK: %[[ADDR:.+]] = getelementptr inbounds i8, i8* %aligned_result, i32 %[[OFFSET]] + // CHECK: %[[ADDR2:.+]] = bitcast i8* %[[ADDR]] to i8** + // CHECK: store i8* %[[MEM]], i8** %[[ADDR2]], align 8 // CHECK: br label %[[InitBB]] // CHECK: [[InitBB]]: - // CHECK: %[[PHI:.+]] = phi i8* [ null, %{{.+}} ], [ %call, %[[AllocBB]] ] + // CHECK: %[[PHI:.+]] = phi i8* [ null, %{{.+}} ], [ %[[MEM]], %[[CheckAlignBB]] ], [ %[[ALIGNED]], %[[AlignAllocBB]] ] // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin(token %[[ID]], i8* %[[PHI]]) // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) @@ -73,8 +92,15 @@ // CHECK: br i1 %[[NeedDealloc]], label %[[FreeBB:.+]], label %[[Afterwards:.+]] // CHECK: [[FreeBB]]: - // CHECK: call void @_ZdlPv(i8* %[[MEM]]) - // CHECK: br label %[[Afterwards]] + // CHECK-NEXT: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK-NEXT: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], 16 + // CHECK-NEXT: %[[OFFSET:.+]] = call i32 @llvm.coro.raw.frame.ptr.offset.i32() + // CHECK-NEXT: %[[ADDR:.+]] = getelementptr inbounds i8, i8* %[[MEM]], i32 %[[OFFSET]] + // CHECK-NEXT: %[[ADDR2:.+]] = bitcast i8* %[[ADDR]] to i8** + // CHECK-NEXT: %[[MEM2:.+]] = load i8*, i8** %[[ADDR2]], align 8 + // CHECK-NEXT: %[[MEM3:.+]] = select i1 %[[CMP]], i8* %[[MEM2]], i8* %[[MEM]] + // CHECK-NEXT: call void @_ZdlPv(i8* %[[MEM3]]) + // CHECK-NEXT: br label %[[Afterwards]] // CHECK: [[Afterwards]]: // CHECK: ret void @@ -98,11 +124,12 @@ extern "C" void f1(promise_new_tag ) { // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv15promise_new_tagEE12promise_typenwEm(i64 %[[SIZE]]) + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], + // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv15promise_new_tagEE12promise_typenwEm(i64 %[[NEWSIZE]]) // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( - // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) - // CHECK: call void @_ZdlPv(i8* %[[MEM]]) + // CHECK: call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) + // CHECK: call void @_ZdlPv(i8* %{{.*}}) co_return; } @@ -130,7 +157,8 @@ // CHECK: %[[INT:.+]] = load i32, i32* %x.addr, align 4 // CHECK: %[[FLOAT:.+]] = load float, float* %y.addr, align 4 // CHECK: %[[DOUBLE:.+]] = load double, double* %z.addr, align 8 - // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv34promise_matching_placement_new_tagifdEE12promise_typenwEmS1_ifd(i64 %[[SIZE]], i32 %[[INT]], float %[[FLOAT]], double %[[DOUBLE]]) + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], + // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv34promise_matching_placement_new_tagifdEE12promise_typenwEmS1_ifd(i64 %[[NEWSIZE]], i32 %[[INT]], float %[[FLOAT]], double %[[DOUBLE]]) co_return; } @@ -177,11 +205,15 @@ extern "C" void f2(promise_delete_tag) { // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], + // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[NEWSIZE]]) // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv18promise_delete_tagEE12promise_typedlEPv(i8* %[[MEM]]) + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[CMP:.+]] = icmp ugt i64 %[[ALIGN]], + // CHECK: %[[MEM2:.+]] = select i1 %[[CMP]], i8* {{.*}}, i8* %[[MEM]] + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv18promise_delete_tagEE12promise_typedlEPv(i8* %[[MEM2]]) co_return; } @@ -202,12 +234,16 @@ extern "C" void f3(promise_sized_delete_tag) { // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], + // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[NEWSIZE]]) // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) - // CHECK: %[[SIZE2:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv24promise_sized_delete_tagEE12promise_typedlEPvm(i8* %[[MEM]], i64 %[[SIZE2]]) + // CHECK: call i64 @llvm.coro.align.i64() + // CHECK: %[[MEM2:.+]] = select i1 {{.*}}, i8* {{.*}}, i8* %[[MEM]] + // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[SIZE2:.+]] = add i64 %[[SIZE]], + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv24promise_sized_delete_tagEE12promise_typedlEPvm(i8* %[[MEM2]], i64 %[[SIZE2]]) co_return; } @@ -230,7 +266,8 @@ // CHECK: %[[Gro:.+]] = alloca i32 // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: %[[MEM:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[SIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow) + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], + // CHECK: %[[MEM:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[NEWSIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow) // CHECK: %[[OK:.+]] = icmp ne i8* %[[MEM]], null // CHECK: br i1 %[[OK]], label %[[OKBB:.+]], label %[[ERRBB:.+]] diff --git a/clang/test/CodeGenCoroutines/coro-cleanup.cpp b/clang/test/CodeGenCoroutines/coro-cleanup.cpp --- a/clang/test/CodeGenCoroutines/coro-cleanup.cpp +++ b/clang/test/CodeGenCoroutines/coro-cleanup.cpp @@ -78,12 +78,18 @@ // CHECK: [[Cleanup]]: // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_typeD1Ev( - // CHECK: %[[Mem0:.+]] = call i8* @llvm.coro.free( - // CHECK: call void @_ZdlPv(i8* %[[Mem0]] + // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free( + // CHECK: call i64 @llvm.coro.align.i64() + // CHECK: call i32 @llvm.coro.raw.frame.ptr.offset.i32() + // CHECK: %[[MEM2:.+]] = select i1 %{{.*}}, i8* %{{.*}}, i8* %[[MEM]] + // CHECK: call void @_ZdlPv(i8* %[[MEM2]]) // CHECK: [[Dealloc]]: - // CHECK: %[[Mem:.+]] = call i8* @llvm.coro.free( - // CHECK: call void @_ZdlPv(i8* %[[Mem]]) + // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free( + // CHECK: call i64 @llvm.coro.align.i64() + // CHECK: call i32 @llvm.coro.raw.frame.ptr.offset.i32() + // CHECK: %[[MEM2:.+]] = select i1 %{{.*}}, i8* %{{.*}}, i8* %[[MEM]] + // CHECK: call void @_ZdlPv(i8* %[[MEM2]]) co_return; } diff --git a/clang/test/CodeGenCoroutines/coro-gro.cpp b/clang/test/CodeGenCoroutines/coro-gro.cpp --- a/clang/test/CodeGenCoroutines/coro-gro.cpp +++ b/clang/test/CodeGenCoroutines/coro-gro.cpp @@ -49,7 +49,8 @@ // CHECK: %[[GroActive:.+]] = alloca i1 // CHECK: %[[Size:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[Size]]) + // CHECK: %[[NewSize:.+]] = add i64 %[[Size]], + // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[NewSize]]) // CHECK: store i1 false, i1* %[[GroActive]] // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_typeC1Ev( // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_type17get_return_objectEv( @@ -66,8 +67,8 @@ // Destroy promise and free the memory. // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_typeD1Ev( - // CHECK: %[[Mem:.+]] = call i8* @llvm.coro.free( - // CHECK: call void @_ZdlPv(i8* %[[Mem]]) + // CHECK: call i8* @llvm.coro.free( + // CHECK: call void @_ZdlPv(i8* %{{.*}}) // Initialize retval from Gro and destroy Gro diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst --- a/llvm/docs/Coroutines.rst +++ b/llvm/docs/Coroutines.rst @@ -948,6 +948,60 @@ The `coro.size` intrinsic is lowered to a constant representing the size of the coroutine frame. +.. _coro.align: + +'llvm.coro.align' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare i32 @llvm.coro.align.i32() + declare i64 @llvm.coro.align.i64() + +Overview: +""""""""" + +The '``llvm.coro.align``' intrinsic returns the alignment of the coroutine frame +in bytes. This is only supported for switched-resume coroutines. + +Arguments: +"""""""""" + +None + +Semantics: +"""""""""" + +The `coro.align` intrinsic is lowered to a constant representing the alignment +of the coroutine frame. + +.. _coro.raw.frame.ptr.offset: + +'llvm.coro.raw.frame.ptr.offset' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare i32 @llvm.coro.raw.frame.ptr.offset.i32() + declare i64 @llvm.coro.raw.frame.ptr.offset.i64() + +Overview: +""""""""" + +The '``llvm.coro.raw.frame.ptr.offset``' intrinsic returns the byte offset of +the raw memory block address (returned by the allocator) in coroutine frame. +The returned value is only legal when is the coroutine frame is overaligned. +This is only supported for switched-resume coroutines. + +Arguments: +"""""""""" + +None + +Semantics: +"""""""""" + +The `coro.raw.frame.ptr.offset` intrinsic is lowered to a constant representing +the byte offset of the raw memory block address in coroutine frame. + .. _coro.begin: 'llvm.coro.begin' Intrinsic diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1237,6 +1237,8 @@ def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; +def int_coro_align : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; +def int_coro_raw_frame_ptr_offset : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>; def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>; diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -486,6 +486,8 @@ return StructAlign; } + SmallVector &getFields() { return Fields; } + FieldIDType getLayoutFieldIndex(FieldIDType Id) const { assert(IsFinished && "not yet finished!"); return Fields[Id].LayoutFieldIndex; @@ -764,21 +766,49 @@ // Because multiple allocas may own the same field slot, // we add allocas to field here. B.addFieldForAllocas(F, FrameData, Shape); - // Add PromiseAlloca to Allocas list so that - // 1. updateLayoutIndex could update its index after - // `performOptimizedStructLayout` - // 2. it is processed in insertSpills. - if (Shape.ABI == coro::ABI::Switch && PromiseAlloca) - // We assume that the promise alloca won't be modified before - // CoroBegin and no alias will be create before CoroBegin. - FrameData.Allocas.emplace_back( - PromiseAlloca, DenseMap>{}, false); + // Create an entry for every spilled value. for (auto &S : FrameData.Spills) { FieldIDType Id = B.addField(S.first->getType(), None); FrameData.setFieldIndex(S.first, Id); } + Optional FramePtrField = None; + if (Shape.ABI == coro::ABI::Switch) { + // Add PromiseAlloca to Allocas list so that + // 1. updateLayoutIndex could update its index after + // `performOptimizedStructLayout` + // 2. it is processed in insertSpills. + if (PromiseAlloca) + // We assume that the promise alloca won't be modified before + // CoroBegin and no alias will be create before CoroBegin. + FrameData.Allocas.emplace_back( + PromiseAlloca, DenseMap>{}, + false); + + Align FrameAlign = + std::max_element( + B.getFields().begin(), B.getFields().end(), + [](auto &F1, auto &F2) { return F1.Alignment < F2.Alignment; }) + ->Alignment; + + // Check for over-alignment. + unsigned NewAlign = Shape.getSwitchCoroId()->getAlignment(); + if (NewAlign && FrameAlign > NewAlign) { + BasicBlock &Entry = F.getEntryBlock(); + IRBuilder<> Builder(&Entry, Entry.getFirstInsertionPt()); + + // Reserve frame space for raw frame pointer. + Value *Mem = Shape.CoroBegin->getMem(); + AllocaInst *FramePtrAddr = + Builder.CreateAlloca(Mem->getType(), nullptr, "alloc.frame.ptr"); + FramePtrField = B.addFieldForAlloca(FramePtrAddr); + FrameData.setFieldIndex(FramePtrAddr, *FramePtrField); + FrameData.Allocas.emplace_back( + FramePtrAddr, DenseMap>{}, true); + } + } + B.finish(FrameTy); FrameData.updateLayoutIndex(B); Shape.FrameAlign = B.getStructAlign(); @@ -790,6 +820,12 @@ Shape.SwitchLowering.IndexField = B.getLayoutFieldIndex(*SwitchIndexFieldId); + if (FramePtrField) { + FieldIDType FieldIdx = B.getLayoutFieldIndex(*FramePtrField); + Shape.SwitchLowering.FramePtrOffset = + DL.getStructLayout(FrameTy)->getElementOffset(FieldIdx); + } + // Also round the frame size up to a multiple of its alignment, as is // generally expected in C/C++. Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign); diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h --- a/llvm/lib/Transforms/Coroutines/CoroInstr.h +++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h @@ -27,6 +27,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/raw_ostream.h" namespace llvm { @@ -121,6 +122,10 @@ : cast(Arg->stripPointerCasts()); } + unsigned getAlignment() const { + return cast(getArgOperand(AlignArg))->getZExtValue(); + } + void clearPromise() { Value *Arg = getArgOperand(PromiseArg); setArgOperand(PromiseArg, @@ -599,6 +604,30 @@ } }; +/// This represents the llvm.coro.align instruction. +class LLVM_LIBRARY_VISIBILITY CoroAlignInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_align; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.raw.frame.ptr.offset instruction. +class LLVM_LIBRARY_VISIBILITY CoroRawFramePtrOffsetInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_raw_frame_ptr_offset; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + class LLVM_LIBRARY_VISIBILITY AnyCoroEndInst : public IntrinsicInst { enum { FrameArg, UnwindArg }; diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -99,6 +99,8 @@ CoroBeginInst *CoroBegin; SmallVector CoroEnds; SmallVector CoroSizes; + SmallVector CoroAligns; + SmallVector CoroRawFramePtrOffsets; SmallVector CoroSuspends; SmallVector SwiftErrorOps; @@ -132,6 +134,7 @@ AllocaInst *PromiseAlloca; BasicBlock *ResumeEntryBlock; unsigned IndexField; + unsigned FramePtrOffset; bool HasFinalSuspend; }; diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -997,23 +997,44 @@ Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct); } -static void replaceFrameSize(coro::Shape &Shape) { +static void replaceFrameSizeAndAlign(coro::Shape &Shape) { if (Shape.ABI == coro::ABI::Async) updateAsyncFuncPointerContextSize(Shape); - if (Shape.CoroSizes.empty()) - return; + if (!Shape.CoroSizes.empty()) { + // In the same function all coro.sizes should have the same result type. + auto *SizeIntrin = Shape.CoroSizes.back(); + Module *M = SizeIntrin->getModule(); + const DataLayout &DL = M->getDataLayout(); + auto Size = DL.getTypeAllocSize(Shape.FrameTy); + auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); + + for (CoroSizeInst *CS : Shape.CoroSizes) { + CS->replaceAllUsesWith(SizeConstant); + CS->eraseFromParent(); + } + } - // In the same function all coro.sizes should have the same result type. - auto *SizeIntrin = Shape.CoroSizes.back(); - Module *M = SizeIntrin->getModule(); - const DataLayout &DL = M->getDataLayout(); - auto Size = DL.getTypeAllocSize(Shape.FrameTy); - auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); + if (!Shape.CoroAligns.empty()) { + auto *Intrin = Shape.CoroAligns.back(); + auto *AlignConstant = + ConstantInt::get(Intrin->getType(), Shape.FrameAlign.value()); - for (CoroSizeInst *CS : Shape.CoroSizes) { - CS->replaceAllUsesWith(SizeConstant); - CS->eraseFromParent(); + for (CoroAlignInst *CS : Shape.CoroAligns) { + CS->replaceAllUsesWith(AlignConstant); + CS->eraseFromParent(); + } + } + + if (!Shape.CoroRawFramePtrOffsets.empty()) { + auto *Intrin = Shape.CoroRawFramePtrOffsets.back(); + auto *FramePtrOffset = ConstantInt::get( + Intrin->getType(), Shape.SwitchLowering.FramePtrOffset); + + for (CoroRawFramePtrOffsetInst *CS : Shape.CoroRawFramePtrOffsets) { + CS->replaceAllUsesWith(FramePtrOffset); + CS->eraseFromParent(); + } } } @@ -1748,7 +1769,7 @@ simplifySuspendPoints(Shape); buildCoroutineFrame(F, Shape); - replaceFrameSize(Shape); + replaceFrameSizeAndAlign(Shape); // If there are no suspend points, no split required, just remove // the allocation and deallocation blocks, they are not needed. diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -234,6 +234,7 @@ Shape.CoroBegin = nullptr; Shape.CoroEnds.clear(); Shape.CoroSizes.clear(); + Shape.CoroAligns.clear(); Shape.CoroSuspends.clear(); Shape.FrameTy = nullptr; @@ -268,6 +269,12 @@ case Intrinsic::coro_size: CoroSizes.push_back(cast(II)); break; + case Intrinsic::coro_align: + CoroAligns.push_back(cast(II)); + break; + case Intrinsic::coro_raw_frame_ptr_offset: + CoroRawFramePtrOffsets.push_back(cast(II)); + break; case Intrinsic::coro_frame: CoroFrames.push_back(cast(II)); break; @@ -375,6 +382,7 @@ this->SwitchLowering.ResumeSwitch = nullptr; this->SwitchLowering.PromiseAlloca = SwitchId->getPromise(); this->SwitchLowering.ResumeEntryBlock = nullptr; + this->SwitchLowering.FramePtrOffset = 0; for (auto AnySuspend : CoroSuspends) { auto Suspend = dyn_cast(AnySuspend); diff --git a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll --- a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll +++ b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll @@ -62,10 +62,10 @@ call i1 @llvm.coro.end(i8* null, i1 false) ret void } -; CHECK: %a.Frame = type { void (%a.Frame*)*, void (%a.Frame*)*, %"struct.task::promise_type", %struct.big_structure, i1, [26 x i8], %struct.big_structure.2 } +; CHECK: %a.Frame = type { void (%a.Frame*)*, void (%a.Frame*)*, %"struct.task::promise_type", %struct.big_structure, i1, i8*, [16 x i8], %struct.big_structure.2 } ; CHECK-LABEL: @a.resume( ; CHECK: %[[A:.*]] = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr, i32 0, i32 3 -; CHECK: %[[A:.*]] = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr, i32 0, i32 6 +; CHECK: %[[A:.*]] = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr, i32 0, i32 7 declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) declare i1 @llvm.coro.alloc(token) #3 diff --git a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll --- a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll +++ b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll @@ -62,7 +62,7 @@ call i1 @llvm.coro.end(i8* null, i1 false) ret void } -; CHECK: %a.Frame = type { void (%a.Frame*)*, void (%a.Frame*)*, %"struct.task::promise_type", i1, [14 x i8], %struct.big_structure } +; CHECK: %a.Frame = type { void (%a.Frame*)*, void (%a.Frame*)*, %"struct.task::promise_type", i1, i8*, %struct.big_structure } ; CHECK-LABEL: @a.resume( ; CHECK: %[[A:.*]] = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr, i32 0, i32 3 ; CHECK: %[[A:.*]] = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr, i32 0, i32 5