diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp --- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -103,21 +103,12 @@ // Given a resume function @f.resume(%f.frame* %frame), returns the size // and expected alignment of %f.frame type. -static std::pair getFrameLayout(Function *Resume) { - // Prefer to pull information from the function attributes. +static Optional> getFrameLayout(Function *Resume) { + // Pull information from the function attributes. auto Size = Resume->getParamDereferenceableBytes(0); - auto Align = Resume->getParamAlign(0); - - // If those aren't given, extract them from the type. - if (Size == 0 || !Align) { - auto *FrameTy = Resume->arg_begin()->getType()->getPointerElementType(); - - const DataLayout &DL = Resume->getParent()->getDataLayout(); - if (!Size) Size = DL.getTypeAllocSize(FrameTy); - if (!Align) Align = DL.getABITypeAlign(FrameTy); - } - - return std::make_pair(Size, *Align); + if (!Size) + return None; + return std::make_pair(Size, Resume->getParamAlign(0).valueOrOne()); } // Finds first non alloca instruction in the entry block of a function. @@ -361,17 +352,19 @@ replaceWithConstant(DestroyAddrConstant, It.second); if (ShouldElide) { - auto FrameSizeAndAlign = getFrameLayout(cast(ResumeAddrConstant)); - elideHeapAllocations(CoroId->getFunction(), FrameSizeAndAlign.first, - FrameSizeAndAlign.second, AA); - coro::replaceCoroFree(CoroId, /*Elide=*/true); - NumOfCoroElided++; + if (auto FrameSizeAndAlign = + getFrameLayout(cast(ResumeAddrConstant))) { + elideHeapAllocations(CoroId->getFunction(), FrameSizeAndAlign->first, + FrameSizeAndAlign->second, AA); + coro::replaceCoroFree(CoroId, /*Elide=*/true); + NumOfCoroElided++; #ifndef NDEBUG - if (!CoroElideInfoOutputFilename.empty()) - *getOrCreateLogFile() - << "Elide " << CoroId->getCoroutine()->getName() << " in " - << CoroId->getFunction()->getName() << "\n"; + if (!CoroElideInfoOutputFilename.empty()) + *getOrCreateLogFile() + << "Elide " << CoroId->getCoroutine()->getName() << " in " + << CoroId->getFunction()->getName() << "\n"; #endif + } } return true; diff --git a/llvm/test/Transforms/Coroutines/coro-elide-musttail.ll b/llvm/test/Transforms/Coroutines/coro-elide-musttail.ll --- a/llvm/test/Transforms/Coroutines/coro-elide-musttail.ll +++ b/llvm/test/Transforms/Coroutines/coro-elide-musttail.ll @@ -13,7 +13,7 @@ @"bar.resumers" = private constant [3 x void (%"bar.Frame"*)*] [void (%"bar.Frame"*)* @"bar.resume", void (%"bar.Frame"*)* undef, void (%"bar.Frame"*)* undef] declare dso_local void @"bar"() align 2 -declare dso_local fastcc void @"bar.resume"(%"bar.Frame"*) align 2 +declare dso_local fastcc void @"bar.resume"(%"bar.Frame"* align 8 dereferenceable(24)) align 2 ; There is a musttail call. ; With alias analysis, we can tell that the frame does not interfere with CALL34, and hence we can keep the tailcalls. diff --git a/llvm/test/Transforms/Coroutines/coro-elide-stat.ll b/llvm/test/Transforms/Coroutines/coro-elide-stat.ll --- a/llvm/test/Transforms/Coroutines/coro-elide-stat.ll +++ b/llvm/test/Transforms/Coroutines/coro-elide-stat.ll @@ -17,7 +17,7 @@ declare void @print(i32) nounwind ; resume part of the coroutine -define fastcc void @f.resume(i8*) { +define fastcc void @f.resume(i8* dereferenceable(1)) { tail call void @print(i32 0) ret void } diff --git a/llvm/test/Transforms/Coroutines/coro-elide.ll b/llvm/test/Transforms/Coroutines/coro-elide.ll --- a/llvm/test/Transforms/Coroutines/coro-elide.ll +++ b/llvm/test/Transforms/Coroutines/coro-elide.ll @@ -7,7 +7,7 @@ declare void @print(i32) nounwind ; resume part of the coroutine -define fastcc void @f.resume(i8*) { +define fastcc void @f.resume(i8* dereferenceable(1)) { tail call void @print(i32 0) ret void } diff --git a/llvm/test/Transforms/Coroutines/coro-heap-elide.ll b/llvm/test/Transforms/Coroutines/coro-heap-elide.ll --- a/llvm/test/Transforms/Coroutines/coro-heap-elide.ll +++ b/llvm/test/Transforms/Coroutines/coro-heap-elide.ll @@ -11,7 +11,7 @@ declare void @bar(i8*) -declare fastcc void @f.resume(%f.frame*) +declare fastcc void @f.resume(%f.frame* align 4 dereferenceable(4)) declare fastcc void @f.destroy(%f.frame*) declare fastcc void @f.cleanup(%f.frame*)