diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4430,11 +4430,24 @@ return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); case Builtin::BI__builtin_coro_size: { - auto & Context = getContext(); + auto &Context = getContext(); auto SizeTy = Context.getSizeType(); auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); - Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T); - return RValue::get(Builder.CreateCall(F)); + Function *CoroSize = CGM.getIntrinsic(Intrinsic::coro_size, T); + // Over-allocate to handle overaligned coro frame. + Function *CoroAlign = CGM.getIntrinsic(Intrinsic::coro_align, T); + const auto &TI = CGM.getContext().getTargetInfo(); + unsigned AlignOfNew = TI.getNewAlign() / TI.getCharWidth(); + Value *SizeCall = Builder.CreateCall(CoroSize); + Value *AlignCall = Builder.CreateCall(CoroAlign); + // int x = coro_align - AlignOfNew; + // coro_size + (x > 0 ? x : 0) + Value *Diff = + Builder.CreateNSWSub(AlignCall, ConstantInt::get(T, AlignOfNew, true)); + Value *Zero = ConstantInt::getSigned(T, 0); + Value *Cmp = Builder.CreateICmp(llvm::CmpInst::ICMP_SGT, Diff, Zero); + Value *Extra = Builder.CreateSelect(Cmp, Diff, Zero); + return RValue::get(Builder.CreateAdd(SizeCall, Extra)); } case Builtin::BI__builtin_coro_id: @@ -16724,8 +16737,7 @@ } } -namespace { -struct BuiltinAlignArgs { +struct CodeGenFunction::BuiltinAlignArgs { llvm::Value *Src = nullptr; llvm::Type *SrcType = nullptr; llvm::Value *Alignment = nullptr; @@ -16752,8 +16764,24 @@ auto *One = llvm::ConstantInt::get(IntType, 1); Mask = CGF.Builder.CreateSub(Alignment, One, "mask"); } + + BuiltinAlignArgs(llvm::Value *SrcV, llvm::Value *Align, + CodeGenFunction &CGF) { + Src = SrcV; + SrcType = Src->getType(); + if (SrcType->isPointerTy()) { + IntType = IntegerType::get( + CGF.getLLVMContext(), + CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType)); + } else { + assert(SrcType->isIntegerTy()); + IntType = cast(SrcType); + } + Alignment = Align; + auto *One = llvm::ConstantInt::get(IntType, 1); + Mask = CGF.Builder.CreateSub(Alignment, One, "mask"); + } }; -} // namespace /// Generate (x & (y-1)) == 0. RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) { @@ -16767,12 +16795,8 @@ llvm::Constant::getNullValue(Args.IntType), "is_aligned")); } -/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up. -/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the -/// llvm.ptrmask instrinsic (with a GEP before in the align_up case). -/// TODO: actually use ptrmask once most optimization passes know about it. -RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { - BuiltinAlignArgs Args(E, *this); +llvm::Value *CodeGenFunction::EmitBuiltinAlignTo(const BuiltinAlignArgs &Args, + const Expr *E, bool AlignUp) { llvm::Value *SrcAddr = Args.Src; if (Args.Src->getType()->isPointerTy()) SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr"); @@ -16811,7 +16835,23 @@ emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment); } assert(Result->getType() == Args.SrcType); - return RValue::get(Result); + return Result; +} + +/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up. +/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the +/// llvm.ptrmask instrinsic (with a GEP before in the align_up case). +/// TODO: actually use ptrmask once most optimization passes know about it. +RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { + BuiltinAlignArgs Args(E, *this); + return RValue::get(EmitBuiltinAlignTo(Args, E, AlignUp)); +} + +llvm::Value *CodeGenFunction::EmitBuiltinAlignTo(llvm::Value *Src, + llvm::Value *Align, + const Expr *E, bool AlignUp) { + BuiltinAlignArgs Args(Src, Align, *this); + return EmitBuiltinAlignTo(Args, E, AlignUp); } Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -547,6 +547,9 @@ auto *EntryBB = Builder.GetInsertBlock(); auto *AllocBB = createBasicBlock("coro.alloc"); + // Align to overaligned boundary in this block. The over-allocation is + // handled performed during lowering BI__builtin_coro_size. + auto *AlignAllocBB = createBasicBlock("coro.alloc.align"); auto *InitBB = createBasicBlock("coro.init"); auto *FinalBB = createBasicBlock("coro.final"); auto *RetBB = createBasicBlock("coro.ret"); @@ -566,7 +569,6 @@ EmitBlock(AllocBB); auto *AllocateCall = EmitScalarExpr(S.getAllocate()); - auto *AllocOrInvokeContBB = Builder.GetInsertBlock(); // Handle allocation failure if 'ReturnStmtOnAllocFailure' was provided. if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure()) { @@ -575,22 +577,32 @@ // See if allocation was successful. auto *NullPtr = llvm::ConstantPointerNull::get(Int8PtrTy); auto *Cond = Builder.CreateICmpNE(AllocateCall, NullPtr); - Builder.CreateCondBr(Cond, InitBB, RetOnFailureBB); + Builder.CreateCondBr(Cond, AlignAllocBB, RetOnFailureBB); // If not, return OnAllocFailure object. EmitBlock(RetOnFailureBB); EmitStmt(RetOnAllocFailure); } else { - Builder.CreateBr(InitBB); + Builder.CreateBr(AlignAllocBB); } + EmitBlock(AlignAllocBB); + + auto *IntType = llvm::IntegerType::get( + getLLVMContext(), + CGM.getDataLayout().getIndexTypeSizeInBits(AllocateCall->getType())); + auto *CoroAlign = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::coro_align, IntType)); + AllocateCall = + EmitBuiltinAlignTo(AllocateCall, CoroAlign, S.getAllocate(), true); + EmitBlock(InitBB); // Pass the result of the allocation to coro.begin. auto *Phi = Builder.CreatePHI(VoidPtrTy, 2); Phi->addIncoming(NullPtr, EntryBB); - Phi->addIncoming(AllocateCall, AllocOrInvokeContBB); + Phi->addIncoming(AllocateCall, AlignAllocBB); auto *CoroBegin = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi}); CurCoro.Data->CoroBegin = CoroBegin; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1885,6 +1885,10 @@ void EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn); + struct BuiltinAlignArgs; + llvm::Value *EmitBuiltinAlignTo(const BuiltinAlignArgs &Args, const Expr *E, + bool AlignUp); + public: CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext=false); ~CodeGenFunction(); @@ -4013,6 +4017,8 @@ RValue EmitBuiltinIsAligned(const CallExpr *E); /// Emit IR for __builtin_align_up/__builtin_align_down. RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp); + llvm::Value *EmitBuiltinAlignTo(llvm::Value *Src, llvm::Value *Align, + const Expr *E, bool AlignUp); llvm::Function *generateBuiltinOSLogHelperFunction( const analyze_os_log::OSLogBufferLayout &Layout, diff --git a/clang/test/CodeGenCoroutines/coro-alloc.cpp b/clang/test/CodeGenCoroutines/coro-alloc.cpp --- a/clang/test/CodeGenCoroutines/coro-alloc.cpp +++ b/clang/test/CodeGenCoroutines/coro-alloc.cpp @@ -60,12 +60,23 @@ // CHECK: br i1 %[[NeedAlloc]], label %[[AllocBB:.+]], label %[[InitBB:.+]] // CHECK: [[AllocBB]]: - // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) + // CHECK-NEXT: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK-NEXT: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK-NEXT: %[[DIFF:.+]] = sub nsw i64 %[[ALIGN]], 16 + // CHECK-NEXT: %[[CMP:.+]] = icmp sgt i64 %[[DIFF]], 0 + // CHECK-NEXT: %[[SEL:.+]] = select i1 %[[CMP]], i64 %[[DIFF]], i64 0 + // CHECK-NEXT: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %[[SEL]] + // CHECK-NEXT: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[NEWSIZE]]) + // CHECK-NEXT: br label %[[AlignAllocBB:.+]] + + // CHECK: [[AlignAllocBB]]: + // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK: %[[ALIGNED:.+]] = getelementptr inbounds i8, i8* %[[MEM]] + // CHECK: call void @llvm.assume(i1 true) [ "align"(i8* %[[ALIGNED]], i64 %[[ALIGN]]) ] // CHECK: br label %[[InitBB]] // CHECK: [[InitBB]]: - // CHECK: %[[PHI:.+]] = phi i8* [ null, %{{.+}} ], [ %call, %[[AllocBB]] ] + // CHECK: %[[PHI:.+]] = phi i8* [ null, %{{.+}} ], [ %[[ALIGNED]], %[[AlignAllocBB]] ] // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin(token %[[ID]], i8* %[[PHI]]) // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) @@ -98,7 +109,8 @@ extern "C" void f1(promise_new_tag ) { // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv15promise_new_tagEE12promise_typenwEm(i64 %[[SIZE]]) + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %{{.+}} + // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv15promise_new_tagEE12promise_typenwEm(i64 %[[NEWSIZE]]) // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) @@ -127,10 +139,11 @@ // CHECK: store double %z, double* %z.addr, align 8 // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %{{.+}} // CHECK: %[[INT:.+]] = load i32, i32* %x.addr, align 4 // CHECK: %[[FLOAT:.+]] = load float, float* %y.addr, align 4 // CHECK: %[[DOUBLE:.+]] = load double, double* %z.addr, align 8 - // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv34promise_matching_placement_new_tagifdEE12promise_typenwEmS1_ifd(i64 %[[SIZE]], i32 %[[INT]], float %[[FLOAT]], double %[[DOUBLE]]) + // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv34promise_matching_placement_new_tagifdEE12promise_typenwEmS1_ifd(i64 %[[NEWSIZE]], i32 %[[INT]], float %[[FLOAT]], double %[[DOUBLE]]) co_return; } @@ -177,7 +190,8 @@ extern "C" void f2(promise_delete_tag) { // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %{{.+}} + // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[NEWSIZE]]) // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) @@ -202,12 +216,14 @@ extern "C" void f3(promise_sized_delete_tag) { // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %{{.+}} + // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[NEWSIZE]]) // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) // CHECK: %[[SIZE2:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv24promise_sized_delete_tagEE12promise_typedlEPvm(i8* %[[MEM]], i64 %[[SIZE2]]) + // CHECK: %[[NEWSIZE2:.+]] = add i64 %[[SIZE2]], %{{.+}} + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv24promise_sized_delete_tagEE12promise_typedlEPvm(i8* %[[MEM]], i64 %[[NEWSIZE2]]) co_return; } @@ -230,7 +246,8 @@ // CHECK: %[[Gro:.+]] = alloca i32 // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: %[[MEM:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[SIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow) + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %{{.+}} + // CHECK: %[[MEM:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[NEWSIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow) // CHECK: %[[OK:.+]] = icmp ne i8* %[[MEM]], null // CHECK: br i1 %[[OK]], label %[[OKBB:.+]], label %[[ERRBB:.+]] diff --git a/clang/test/CodeGenCoroutines/coro-builtins.c b/clang/test/CodeGenCoroutines/coro-builtins.c --- a/clang/test/CodeGenCoroutines/coro-builtins.c +++ b/clang/test/CodeGenCoroutines/coro-builtins.c @@ -21,7 +21,12 @@ __builtin_coro_noop(); // CHECK-NEXT: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK-NEXT: %[[MEM:.+]] = call i8* @myAlloc(i64 %[[SIZE]]) + // CHECK-NEXT: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK-NEXT: %[[DIFF:.+]] = sub nsw i64 %[[ALIGN]], 16 + // CHECK-NEXT: %[[CMP:.+]] = icmp sgt i64 %[[DIFF]], 0 + // CHECK-NEXT: %[[SEL:.+]] = select i1 %[[CMP]], i64 %[[DIFF]], i64 0 + // CHECK-NEXT: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %[[SEL]] + // CHECK-NEXT: %[[MEM:.+]] = call i8* @myAlloc(i64 %[[NEWSIZE]]) // CHECK-NEXT: %[[FRAME:.+]] = call i8* @llvm.coro.begin(token %[[COROID]], i8* %[[MEM]]) __builtin_coro_begin(myAlloc(__builtin_coro_size())); diff --git a/clang/test/CodeGenCoroutines/coro-gro.cpp b/clang/test/CodeGenCoroutines/coro-gro.cpp --- a/clang/test/CodeGenCoroutines/coro-gro.cpp +++ b/clang/test/CodeGenCoroutines/coro-gro.cpp @@ -49,7 +49,8 @@ // CHECK: %[[GroActive:.+]] = alloca i1 // CHECK: %[[Size:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[Size]]) + // CHECK: %[[NewSize:.+]] = add i64 %[[Size]], %{{.+}} + // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[NewSize]]) // CHECK: store i1 false, i1* %[[GroActive]] // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_typeC1Ev( // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_type17get_return_objectEv( diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst --- a/llvm/docs/Coroutines.rst +++ b/llvm/docs/Coroutines.rst @@ -948,6 +948,32 @@ The `coro.size` intrinsic is lowered to a constant representing the size of the coroutine frame. +.. _coro.align: + +'llvm.coro.align' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare i32 @llvm.coro.align.i32() + declare i64 @llvm.coro.align.i64() + +Overview: +""""""""" + +The '``llvm.coro.align``' intrinsic returns the alignment of the coroutine frame +in bytes. This is only supported for switched-resume coroutines. + +Arguments: +"""""""""" + +None + +Semantics: +"""""""""" + +The `coro.align` intrinsic is lowered to a constant representing the alignment +of the coroutine frame. + .. _coro.begin: 'llvm.coro.begin' Intrinsic diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1237,6 +1237,7 @@ def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; +def int_coro_align : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>; def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>; diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h --- a/llvm/lib/Transforms/Coroutines/CoroInstr.h +++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h @@ -599,6 +599,18 @@ } }; +/// This represents the llvm.coro.align instruction. +class LLVM_LIBRARY_VISIBILITY CoroAlignInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_align; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + class LLVM_LIBRARY_VISIBILITY AnyCoroEndInst : public IntrinsicInst { enum { FrameArg, UnwindArg }; diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -99,6 +99,7 @@ CoroBeginInst *CoroBegin; SmallVector CoroEnds; SmallVector CoroSizes; + SmallVector CoroAligns; SmallVector CoroSuspends; SmallVector SwiftErrorOps; diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -997,23 +997,33 @@ Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct); } -static void replaceFrameSize(coro::Shape &Shape) { +static void replaceFrameSizeAndAlign(coro::Shape &Shape) { if (Shape.ABI == coro::ABI::Async) updateAsyncFuncPointerContextSize(Shape); - if (Shape.CoroSizes.empty()) - return; + if (!Shape.CoroSizes.empty()) { + // In the same function all coro.sizes should have the same result type. + auto *SizeIntrin = Shape.CoroSizes.back(); + Module *M = SizeIntrin->getModule(); + const DataLayout &DL = M->getDataLayout(); + auto Size = DL.getTypeAllocSize(Shape.FrameTy); + auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); + + for (CoroSizeInst *CS : Shape.CoroSizes) { + CS->replaceAllUsesWith(SizeConstant); + CS->eraseFromParent(); + } + } - // In the same function all coro.sizes should have the same result type. - auto *SizeIntrin = Shape.CoroSizes.back(); - Module *M = SizeIntrin->getModule(); - const DataLayout &DL = M->getDataLayout(); - auto Size = DL.getTypeAllocSize(Shape.FrameTy); - auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); + if (!Shape.CoroAligns.empty()) { + auto *Intrin = Shape.CoroAligns.back(); + auto *AlignConstant = + ConstantInt::get(Intrin->getType(), Shape.FrameAlign.value()); - for (CoroSizeInst *CS : Shape.CoroSizes) { - CS->replaceAllUsesWith(SizeConstant); - CS->eraseFromParent(); + for (CoroAlignInst *CS : Shape.CoroAligns) { + CS->replaceAllUsesWith(AlignConstant); + CS->eraseFromParent(); + } } } @@ -1748,7 +1758,7 @@ simplifySuspendPoints(Shape); buildCoroutineFrame(F, Shape); - replaceFrameSize(Shape); + replaceFrameSizeAndAlign(Shape); // If there are no suspend points, no split required, just remove // the allocation and deallocation blocks, they are not needed. diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -234,6 +234,7 @@ Shape.CoroBegin = nullptr; Shape.CoroEnds.clear(); Shape.CoroSizes.clear(); + Shape.CoroAligns.clear(); Shape.CoroSuspends.clear(); Shape.FrameTy = nullptr; @@ -268,6 +269,9 @@ case Intrinsic::coro_size: CoroSizes.push_back(cast(II)); break; + case Intrinsic::coro_align: + CoroAligns.push_back(cast(II)); + break; case Intrinsic::coro_frame: CoroFrames.push_back(cast(II)); break;