diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4430,11 +4430,24 @@ return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); case Builtin::BI__builtin_coro_size: { - auto & Context = getContext(); + auto &Context = getContext(); auto SizeTy = Context.getSizeType(); auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); - Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T); - return RValue::get(Builder.CreateCall(F)); + Function *CoroSize = CGM.getIntrinsic(Intrinsic::coro_size, T); + // Over-allocate to handle overaligned coro frame. + Function *CoroAlign = CGM.getIntrinsic(Intrinsic::coro_align, T); + auto &TI = CGM.getContext().getTargetInfo(); + unsigned AlignOfNew = TI.getNewAlign() / TI.getCharWidth(); + Value *SizeCall = Builder.CreateCall(CoroSize); + Value *AlignCall = Builder.CreateCall(CoroAlign); + // int x = coro_align - AlignOfNew; + // coro_size + (x > 0 ? x : 0) + Value *Diff = + Builder.CreateNSWSub(AlignCall, ConstantInt::get(T, AlignOfNew, true)); + Value *Zero = ConstantInt::getSigned(T, 0); + Value *Cmp = Builder.CreateICmp(llvm::CmpInst::ICMP_SGT, Diff, Zero); + Value *Extra = Builder.CreateSelect(Cmp, Diff, Zero); + return RValue::get(Builder.CreateAdd(SizeCall, Extra)); } case Builtin::BI__builtin_coro_id: diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -548,7 +548,7 @@ auto *EntryBB = Builder.GetInsertBlock(); auto *AllocBB = createBasicBlock("coro.alloc"); // Align to overaligned boundary in this block. The over-allocation is - // handled by aligning up frame's store size. + // handled performed during lowering BI__builtin_coro_size. auto *AlignAllocBB = createBasicBlock("coro.alloc.align"); auto *InitBB = createBasicBlock("coro.init"); auto *FinalBB = createBasicBlock("coro.final"); diff --git a/clang/test/CodeGenCoroutines/coro-alloc.cpp b/clang/test/CodeGenCoroutines/coro-alloc.cpp --- a/clang/test/CodeGenCoroutines/coro-alloc.cpp +++ b/clang/test/CodeGenCoroutines/coro-alloc.cpp @@ -60,9 +60,14 @@ // CHECK: br i1 %[[NeedAlloc]], label %[[AllocBB:.+]], label %[[InitBB:.+]] // CHECK: [[AllocBB]]: - // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) - // CHECK: br label %[[AlignAllocBB:.+]] + // CHECK-NEXT: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK-NEXT: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK-NEXT: %[[DIFF:.+]] = sub nsw i64 %[[ALIGN]], 16 + // CHECK-NEXT: %[[CMP:.+]] = icmp sgt i64 %[[DIFF]], 0 + // CHECK-NEXT: %[[SEL:.+]] = select i1 %[[CMP]], i64 %[[DIFF]], i64 0 + // CHECK-NEXT: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %[[SEL]] + // CHECK-NEXT: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[NEWSIZE]]) + // CHECK-NEXT: br label %[[AlignAllocBB:.+]] // CHECK: [[AlignAllocBB]]: // CHECK: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() @@ -104,7 +109,8 @@ extern "C" void f1(promise_new_tag ) { // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv15promise_new_tagEE12promise_typenwEm(i64 %[[SIZE]]) + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %{{.+}} + // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv15promise_new_tagEE12promise_typenwEm(i64 %[[NEWSIZE]]) // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) @@ -133,10 +139,11 @@ // CHECK: store double %z, double* %z.addr, align 8 // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %{{.+}} // CHECK: %[[INT:.+]] = load i32, i32* %x.addr, align 4 // CHECK: %[[FLOAT:.+]] = load float, float* %y.addr, align 4 // CHECK: %[[DOUBLE:.+]] = load double, double* %z.addr, align 8 - // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv34promise_matching_placement_new_tagifdEE12promise_typenwEmS1_ifd(i64 %[[SIZE]], i32 %[[INT]], float %[[FLOAT]], double %[[DOUBLE]]) + // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv34promise_matching_placement_new_tagifdEE12promise_typenwEmS1_ifd(i64 %[[NEWSIZE]], i32 %[[INT]], float %[[FLOAT]], double %[[DOUBLE]]) co_return; } @@ -183,7 +190,8 @@ extern "C" void f2(promise_delete_tag) { // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %{{.+}} + // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[NEWSIZE]]) // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) @@ -208,12 +216,14 @@ extern "C" void f3(promise_sized_delete_tag) { // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %{{.+}} + // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[NEWSIZE]]) // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) // CHECK: %[[SIZE2:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv24promise_sized_delete_tagEE12promise_typedlEPvm(i8* %[[MEM]], i64 %[[SIZE2]]) + // CHECK: %[[NEWSIZE2:.+]] = add i64 %[[SIZE2]], %{{.+}} + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv24promise_sized_delete_tagEE12promise_typedlEPvm(i8* %[[MEM]], i64 %[[NEWSIZE2]]) co_return; } @@ -236,7 +246,8 @@ // CHECK: %[[Gro:.+]] = alloca i32 // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: %[[MEM:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[SIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow) + // CHECK: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %{{.+}} + // CHECK: %[[MEM:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[NEWSIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow) // CHECK: %[[OK:.+]] = icmp ne i8* %[[MEM]], null // CHECK: br i1 %[[OK]], label %[[OKBB:.+]], label %[[ERRBB:.+]] diff --git a/clang/test/CodeGenCoroutines/coro-builtins.c b/clang/test/CodeGenCoroutines/coro-builtins.c --- a/clang/test/CodeGenCoroutines/coro-builtins.c +++ b/clang/test/CodeGenCoroutines/coro-builtins.c @@ -21,7 +21,12 @@ __builtin_coro_noop(); // CHECK-NEXT: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK-NEXT: %[[MEM:.+]] = call i8* @myAlloc(i64 %[[SIZE]]) + // CHECK-NEXT: %[[ALIGN:.+]] = call i64 @llvm.coro.align.i64() + // CHECK-NEXT: %[[DIFF:.+]] = sub nsw i64 %[[ALIGN]], 16 + // CHECK-NEXT: %[[CMP:.+]] = icmp sgt i64 %[[DIFF]], 0 + // CHECK-NEXT: %[[SEL:.+]] = select i1 %[[CMP]], i64 %[[DIFF]], i64 0 + // CHECK-NEXT: %[[NEWSIZE:.+]] = add i64 %[[SIZE]], %[[SEL]] + // CHECK-NEXT: %[[MEM:.+]] = call i8* @myAlloc(i64 %[[NEWSIZE]]) // CHECK-NEXT: %[[FRAME:.+]] = call i8* @llvm.coro.begin(token %[[COROID]], i8* %[[MEM]]) __builtin_coro_begin(myAlloc(__builtin_coro_size())); diff --git a/clang/test/CodeGenCoroutines/coro-gro.cpp b/clang/test/CodeGenCoroutines/coro-gro.cpp --- a/clang/test/CodeGenCoroutines/coro-gro.cpp +++ b/clang/test/CodeGenCoroutines/coro-gro.cpp @@ -49,7 +49,8 @@ // CHECK: %[[GroActive:.+]] = alloca i1 // CHECK: %[[Size:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[Size]]) + // CHECK: %[[NewSize:.+]] = add i64 %[[Size]], %{{.+}} + // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[NewSize]]) // CHECK: store i1 false, i1* %[[GroActive]] // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_typeC1Ev( // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_type17get_return_objectEv( diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1004,14 +1004,10 @@ if (!Shape.CoroSizes.empty()) { // In the same function all coro.sizes should have the same result type. auto *SizeIntrin = Shape.CoroSizes.back(); - auto *SizeConstant = - ConstantInt::get(SizeIntrin->getType(), Shape.FrameSize); - assert( - Shape.FrameSize == - alignTo(SizeIntrin->getModule()->getDataLayout().getTypeStoreSize( - Shape.FrameTy), - Align(Shape.FrameAlign)) && - "FrameSize should already be aligned up to FrameAlign"); + Module *M = SizeIntrin->getModule(); + const DataLayout &DL = M->getDataLayout(); + auto Size = DL.getTypeAllocSize(Shape.FrameTy); + auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); for (CoroSizeInst *CS : Shape.CoroSizes) { CS->replaceAllUsesWith(SizeConstant);