Index: llvm/include/llvm/Transforms/Coroutines.h =================================================================== --- llvm/include/llvm/Transforms/Coroutines.h +++ llvm/include/llvm/Transforms/Coroutines.h @@ -11,6 +11,8 @@ #ifndef LLVM_TRANSFORMS_COROUTINES_H #define LLVM_TRANSFORMS_COROUTINES_H +#include "llvm/Passes/PassBuilder.h" + namespace llvm { class Pass; @@ -23,7 +25,8 @@ Pass *createCoroEarlyLegacyPass(); /// Split up coroutines into multiple functions driving their state machines. -Pass *createCoroSplitLegacyPass(); +Pass *createCoroSplitLegacyPass( + PassBuilder::OptimizationLevel Level = PassBuilder::OptimizationLevel::O0); /// Analyze coroutines use sites, devirtualize resume/destroy calls and elide /// heap allocation for coroutine frame where possible. Index: llvm/include/llvm/Transforms/Coroutines/CoroSplit.h =================================================================== --- llvm/include/llvm/Transforms/Coroutines/CoroSplit.h +++ llvm/include/llvm/Transforms/Coroutines/CoroSplit.h @@ -18,13 +18,19 @@ #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" namespace llvm { struct CoroSplitPass : PassInfoMixin { + CoroSplitPass() : OptLevel(PassBuilder::OptimizationLevel::O0) {} + CoroSplitPass(PassBuilder::OptimizationLevel OptLevel) : OptLevel(OptLevel) {} + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); static bool isRequired() { return true; } + + PassBuilder::OptimizationLevel OptLevel; }; } // end namespace llvm Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -878,7 +878,7 @@ MainCGPipeline.addPass(AttributorCGSCCPass()); if (PTO.Coroutines) - MainCGPipeline.addPass(CoroSplitPass()); + MainCGPipeline.addPass(CoroSplitPass(Level)); // Now deduce any function attributes based in the current code. MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); @@ -1913,6 +1913,34 @@ return Result; } +Expected +parseOptLevelOptions(StringRef Params) { + PassBuilder::OptimizationLevel Result = PassBuilder::OptimizationLevel::O0; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + + if (ParamName == "O0") { + Result = PassBuilder::OptimizationLevel::O0; + } else if (ParamName == "O1") { + Result = PassBuilder::OptimizationLevel::O1; + } else if (ParamName == "O2") { + Result = PassBuilder::OptimizationLevel::O2; + } else if (ParamName == "O3") { + Result = PassBuilder::OptimizationLevel::O3; + } else if (ParamName == "Os") { + Result = PassBuilder::OptimizationLevel::Os; + } else if (ParamName == "Oz") { + Result = PassBuilder::OptimizationLevel::Oz; + } else { + return make_error( + formatv("invalid OptLevel parameter '{0}' ", ParamName).str(), + inconvertibleErrorCode()); + } + } + return Result; +} + } // namespace /// Tests whether a pass name starts with a valid prefix for a default pipeline @@ -1987,6 +2015,9 @@ #define CGSCC_PASS(NAME, CREATE_PASS) \ if (Name == NAME) \ return true; +#define CGSCC_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \ + if (checkParametrizedPassName(Name, NAME)) \ + return true; #define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \ return true; @@ -2245,6 +2276,15 @@ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(CREATE_PASS)); \ return Error::success(); \ } +#define CGSCC_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \ + if (checkParametrizedPassName(Name, NAME)) { \ + auto Params = parsePassParameters(PARSER, Name, NAME); \ + if (!Params) \ + return Params.takeError(); \ + MPM.addPass( \ + createModuleToPostOrderCGSCCPassAdaptor(CREATE_PASS(Params.get()))); \ + return Error::success(); \ + } #define FUNCTION_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS)); \ @@ -2344,6 +2384,14 @@ CGPM.addPass(CREATE_PASS); \ return Error::success(); \ } +#define CGSCC_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \ + if (checkParametrizedPassName(Name, NAME)) { \ + auto Params = parsePassParameters(PARSER, Name, NAME); \ + if (!Params) \ + return Params.takeError(); \ + CGPM.addPass(CREATE_PASS(Params.get())); \ + return Error::success(); \ + } #define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ if (Name == "require<" NAME ">") { \ CGPM.addPass(RequireAnalysisPass< \ Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -119,10 +119,17 @@ CGSCC_PASS("attributor-cgscc", AttributorCGSCCPass()) CGSCC_PASS("inline", InlinerPass()) CGSCC_PASS("openmpopt", OpenMPOptPass()) -CGSCC_PASS("coro-split", CoroSplitPass()) CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass()) #undef CGSCC_PASS +#ifndef CGSCC_PASS_WITH_PARAMS +#define CGSCC_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) +#endif +CGSCC_PASS_WITH_PARAMS("coro-split", [](PassBuilder::OptimizationLevel OptLevel){ + return CoroSplitPass(OptLevel); + }, parseOptLevelOptions) +#undef CGSCC_PASS_WITH_PARAMS + #ifndef FUNCTION_ANALYSIS #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) #endif Index: llvm/lib/Transforms/Coroutines/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Coroutines/CMakeLists.txt +++ llvm/lib/Transforms/Coroutines/CMakeLists.txt @@ -8,4 +8,5 @@ DEPENDS intrinsics_gen + LLVMAnalysis ) Index: llvm/lib/Transforms/Coroutines/CoroFrame.cpp =================================================================== --- llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/PtrUseVisitor.h" +#include "llvm/Analysis/StackLifetime.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DIBuilder.h" @@ -28,8 +29,8 @@ #include "llvm/IR/InstIterator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/circular_raw_ostream.h" #include "llvm/Support/OptimizedStructLayout.h" +#include "llvm/Support/circular_raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" @@ -41,6 +42,13 @@ // "coro-frame", which results in leaner debug spew. #define DEBUG_TYPE "coro-suspend-crossing" +static cl::opt EnableReuseStorageInFrame( + "reuse-storage-in-coroutine-frame", cl::Hidden, + cl::desc( + "Enable the optimization which would reuse the storage in the coroutine \ + frame for allocas whose liferanges are not overlapped, for testing purposes"), + llvm::cl::init(false)); + enum { SmallVectorThreshold = 32 }; // Provides two way mapping between the blocks and numbers. @@ -337,6 +345,74 @@ #endif namespace { +/// We want to put the allocas whose lifetime-ranges are not overlapped +/// into one slot of coroutine frame. +/// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566 +/// +/// cppcoro::task alternative_paths(bool cond) { +/// if (cond) { +/// big_structure a; +/// process(a); +/// co_await something(); +/// } else { +/// big_structure b; +/// process2(b); +/// co_await something(); +/// } +/// } +/// +/// We want to put variable a and variable b in the same slot to +/// reduce the size of coroutine frame. +/// +/// This class use StackLifetime algorithm to partition the AllocaInsts in +/// Spills to non-overlapped sets in order to put Alloca in the same +/// non-overlapped set into the same slot in the Coroutine Frame. Then +/// `FrameTypeBuilder::addFieldForAllocas` function would add field for the +/// allocas in the same non-overlapped set. +/// +/// Side Effects: Because We sort the allocas, the order of allocas in the frame +/// may be different with the order in the source code. +class NonOverlappedAllocaInfo { +public: + using AllocaSetType = SmallVector; + +private: + const Function &F; + SpillInfo &Spills; + coro::Shape &Shape; + bool ReuseStorageInFrame = false; + + DenseMap AllocaIndex; + DenseMap SpillOfAllocas; + SmallVector NonOverlapedAllocas; + +public: + NonOverlappedAllocaInfo(const Function &F, SpillInfo &Spills, + coro::Shape &Shape, + PassBuilder::OptimizationLevel OptLevel) + : F(F), Spills(Spills), Shape(Shape) { + ReuseStorageInFrame = (OptLevel != PassBuilder::OptimizationLevel::O0) || + EnableReuseStorageInFrame; + } + + void run(); + + AllocaSetType &getAllocaSet(AllocaInst *AI) { + assert(AllocaIndex.count(AI)); + auto Index = AllocaIndex[AI]; + assert(Index < NonOverlapedAllocas.size()); + return NonOverlapedAllocas[Index]; + } + + auto begin() { return NonOverlapedAllocas.begin(); } + auto end() { return NonOverlapedAllocas.end(); } + + Spill *getSpill(AllocaInst *AI) { + assert(SpillOfAllocas.count(AI)); + return SpillOfAllocas[AI]; + } +}; + // We cannot rely solely on natural alignment of a type when building a // coroutine frame and if the alignment specified on the Alloca instruction // differs from the natural alignment of the alloca type we will need to insert @@ -361,9 +437,16 @@ SmallVector Fields; DenseMap FieldIndexByKey; + NonOverlappedAllocaInfo AllocaLifetimeInfo; + public: - FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL) - : DL(DL), Context(Context) {} + FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL, + const Function &F, SpillInfo &Spills, coro::Shape &Shape, + PassBuilder::OptimizationLevel OptLevel) + : DL(DL), Context(Context), + AllocaLifetimeInfo(F, Spills, Shape, OptLevel) { + AllocaLifetimeInfo.run(); + } class FieldId { size_t Value; @@ -389,6 +472,16 @@ return addField(Ty, AI->getAlign(), ForSpill, IsHeader); } + /// For each NonOverlapped Alloca Set, find the largest type allocated + /// by these allocas. Use the largest type as the field type for all of + /// the allocas in the NonOverlapped Alloca Set. + void addFieldForAllocas() { + for (auto &AllocaSet : AllocaLifetimeInfo) { + auto *LargestAI = *AllocaSet.begin(); + addFieldForAlloca(LargestAI, AllocaLifetimeInfo.getSpill(LargestAI)); + } + } + /// Add a field to this structure. FieldId addField(Type *Ty, MaybeAlign FieldAlignment, Spill *ForSpill = nullptr, @@ -496,7 +589,12 @@ F.Offset = Offset; F.FieldIndex = FieldTypes.size(); if (F.ForSpill) { - F.ForSpill->setFieldIndex(F.FieldIndex); + if (AllocaInst *AI = dyn_cast(F.ForSpill->def())) { + auto &AllocaSet = AllocaLifetimeInfo.getAllocaSet(AI); + for (auto Alloca : AllocaSet) + AllocaLifetimeInfo.getSpill(Alloca)->setFieldIndex(F.FieldIndex); + } else + F.ForSpill->setFieldIndex(F.FieldIndex); } FieldTypes.push_back(F.Ty); @@ -517,6 +615,109 @@ IsFinished = true; } +void NonOverlappedAllocaInfo::run() { + SmallVector Allocas; + const DataLayout &DL = F.getParent()->getDataLayout(); + for (auto &Spill : Spills) + if (AllocaInst *AI = dyn_cast(Spill.def())) + if (find(Allocas, AI) == Allocas.end()) { + SpillOfAllocas[AI] = &Spill; + Allocas.emplace_back(AI); + } + + if (!ReuseStorageInFrame) { + for (auto Alloca : Allocas) { + AllocaIndex[Alloca] = NonOverlapedAllocas.size(); + NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); + } + return; + } + + // Because there are pathes from the lifetime.start to coro.end + // for each alloca, the liferanges for every alloca is overlaped + // in the blocks who contain coro.end and the successor blocks. + // So we choose to skip there blocks when we calculates the liferange + // for each alloca. It should be reasonable since there shouldn't be uses + // in these blocks and the coroutine frame shouldn't be used outside the + // coroutine body. + // + // Note that the user of coro.suspend may not be SwitchInst. However, this + // case seems too complex to handle. And it is harmless to skip these + // patterns since it just prevend putting the allocas to live in the same + // slot. + DenseMap DefaultSuspendDest; + for (auto CoroSuspendInst : Shape.CoroSuspends) { + for (auto U : CoroSuspendInst->users()) { + if (auto *ConstSWI = dyn_cast(U)) { + auto *SWI = const_cast(ConstSWI); + DefaultSuspendDest[SWI] = SWI->getDefaultDest(); + SWI->setDefaultDest(SWI->getSuccessor(1)); + } + } + } + + StackLifetime StackLifetimeAnalyzer(F, Allocas, + StackLifetime::LivenessType::May); + StackLifetimeAnalyzer.run(); + auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) { + return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps( + StackLifetimeAnalyzer.getLiveRange(AI2)); + }; + auto GetAllocaSize = [&](const AllocaInst *AI) { + Optional RetSize = AI->getAllocationSizeInBits(DL); + assert(RetSize && "We can't handle scalable type now.\n"); + return RetSize.getValue(); + }; + // Put larger allocas in the front. So the larger allocas have higher + // priority to merge, which can save more space potentially. Also each + // AllocaSet would be ordered. So we can get the largest Alloca in one + // AllocaSet easily. + sort(Allocas, [&](auto Iter1, auto Iter2) { + return GetAllocaSize(Iter1) > GetAllocaSize(Iter2); + }); + for (auto Alloca : Allocas) { + bool Merged = false; + // Try to find if the Alloca is not inferenced with any existing + // NonOverlappedAllocaSet. If it is true, insert the alloca to that + // NonOverlappedAllocaSet. + for (auto &AllocaSet : NonOverlapedAllocas) { + assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n"); + bool CouldMerge = none_of(AllocaSet, [&](auto Iter) { + return IsAllocaInferenre(Alloca, Iter); + }); + if (!CouldMerge) + continue; + AllocaIndex[Alloca] = AllocaIndex[*AllocaSet.begin()]; + AllocaSet.push_back(Alloca); + Merged = true; + break; + } + if (!Merged) { + AllocaIndex[Alloca] = NonOverlapedAllocas.size(); + NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); + } + } + // Recover the default target destination for each Switch statement + // reserved. + for (auto SwitchAndDefaultDest : DefaultSuspendDest) { + SwitchInst *SWI = SwitchAndDefaultDest.first; + BasicBlock *DestBB = SwitchAndDefaultDest.second; + SWI->setDefaultDest(DestBB); + } + // This Debug Info could tell us which allocas are merged into one slot. + LLVM_DEBUG(for (auto &AllocaSet + : NonOverlapedAllocas) { + if (AllocaSet.size() > 1) { + dbgs() << "In Function:" << F.getName() << "\n"; + dbgs() << "Find Union Set " + << "\n"; + dbgs() << "\tAllocas are \n"; + for (auto Alloca : AllocaSet) + dbgs() << "\t\t" << *Alloca << "\n"; + } + }); +} + // Build a struct that will keep state for an active coroutine. // struct f.frame { // ResumeFnTy ResumeFnAddr; @@ -526,7 +727,8 @@ // ... spills ... // }; static StructType *buildFrameType(Function &F, coro::Shape &Shape, - SpillInfo &Spills) { + SpillInfo &Spills, + PassBuilder::OptimizationLevel OptLevel) { LLVMContext &C = F.getContext(); const DataLayout &DL = F.getParent()->getDataLayout(); StructType *FrameTy = [&] { @@ -535,7 +737,7 @@ return StructType::create(C, Name); }(); - FrameTypeBuilder B(C, DL); + FrameTypeBuilder B(C, DL, F, Spills, Shape, OptLevel); AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); Optional PromiseFieldId; @@ -568,9 +770,11 @@ assert(PromiseAlloca == nullptr && "lowering doesn't support promises"); } + // Because multiple allocas may own the same field slot, + // we add allocas to field here. + B.addFieldForAllocas(); Value *CurrentDef = nullptr; - - // Create an entry for every spilled value. + // Create an entry for every spilled value which is not an AllocaInst. for (auto &S : Spills) { // We can have multiple entries in Spills for a single value, but // they should form a contiguous run. Ignore all but the first. @@ -582,9 +786,7 @@ assert(CurrentDef != PromiseAlloca && "recorded spill use of promise alloca?"); - if (auto *AI = dyn_cast(CurrentDef)) { - B.addFieldForAlloca(AI, &S); - } else { + if (!isa(CurrentDef)) { Type *Ty = CurrentDef->getType(); B.addField(Ty, None, &S); } @@ -820,7 +1022,17 @@ } } - return Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices); + auto GEP = cast( + Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices)); + if (isa(Orig)) { + // If the type of GEP is not equal to the type of AllocaInst, it implies + // that the AllocaInst may be reused in the Frame slot of other + // AllocaInst. So we cast the GEP to the type of AllocaInst. + if (GEP->getResultElementType() != Orig->getType()) + return Builder.CreateBitCast(GEP, Orig->getType(), + Orig->getName() + Twine(".cast")); + } + return GEP; }; // Create a load instruction to reload the spilled value from the coroutine @@ -1704,7 +1916,8 @@ } } -void coro::buildCoroutineFrame(Function &F, Shape &Shape) { +void coro::buildCoroutineFrame(Function &F, Shape &Shape, + PassBuilder::OptimizationLevel OptLevel) { eliminateSwiftError(F, Shape); if (Shape.ABI == coro::ABI::Switch && @@ -1843,7 +2056,7 @@ LLVM_DEBUG(dump("Spills", Spills)); if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) sinkSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin); - Shape.FrameTy = buildFrameType(F, Shape, Spills); + Shape.FrameTy = buildFrameType(F, Shape, Spills, OptLevel); Shape.FramePtr = insertSpills(Spills, Shape); lowerLocalAllocas(LocalAllocas, DeadInstructions); Index: llvm/lib/Transforms/Coroutines/CoroInternal.h =================================================================== --- llvm/lib/Transforms/Coroutines/CoroInternal.h +++ llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -13,6 +13,7 @@ #include "CoroInstr.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/Passes/PassBuilder.h" #include "llvm/Transforms/Coroutines.h" namespace llvm { @@ -242,7 +243,10 @@ void buildFrom(Function &F); }; -void buildCoroutineFrame(Function &F, Shape &Shape); +void buildCoroutineFrame(Function &F, Shape &Shape, + PassBuilder::OptimizationLevel OptLevel); + +PassBuilder::OptimizationLevel getOptLevel(unsigned level); } // End namespace coro. } // End namespace llvm Index: llvm/lib/Transforms/Coroutines/CoroSplit.cpp =================================================================== --- llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1416,7 +1416,8 @@ } static coro::Shape splitCoroutine(Function &F, - SmallVectorImpl &Clones) { + SmallVectorImpl &Clones, + PassBuilder::OptimizationLevel OptLevel) { PrettyStackTraceFunction prettyStackTrace(F); // The suspend-crossing algorithm in buildCoroutineFrame get tripped @@ -1428,7 +1429,7 @@ return Shape; simplifySuspendPoints(Shape); - buildCoroutineFrame(F, Shape); + buildCoroutineFrame(F, Shape, OptLevel); replaceFrameSize(Shape); // If there are no suspend points, no split required, just remove @@ -1699,7 +1700,7 @@ F.removeFnAttr(CORO_PRESPLIT_ATTR); SmallVector Clones; - const coro::Shape Shape = splitCoroutine(F, Clones); + const coro::Shape Shape = splitCoroutine(F, Clones, OptLevel); updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM); } @@ -1723,11 +1724,14 @@ struct CoroSplitLegacy : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - CoroSplitLegacy() : CallGraphSCCPass(ID) { + CoroSplitLegacy( + PassBuilder::OptimizationLevel Level = PassBuilder::OptimizationLevel::O0) + : CallGraphSCCPass(ID), OptLevel(Level) { initializeCoroSplitLegacyPass(*PassRegistry::getPassRegistry()); } bool Run = false; + PassBuilder::OptimizationLevel OptLevel; // A coroutine is identified by the presence of coro.begin intrinsic, if // we don't have any, this pass has nothing to do. @@ -1776,7 +1780,7 @@ F->removeFnAttr(CORO_PRESPLIT_ATTR); SmallVector Clones; - const coro::Shape Shape = splitCoroutine(*F, Clones); + const coro::Shape Shape = splitCoroutine(*F, Clones, OptLevel); updateCallGraphAfterCoroutineSplit(*F, Shape, Clones, CG, SCC); } @@ -1807,4 +1811,6 @@ "Split coroutine into a set of functions driving its state machine", false, false) -Pass *llvm::createCoroSplitLegacyPass() { return new CoroSplitLegacy(); } +Pass *llvm::createCoroSplitLegacyPass(PassBuilder::OptimizationLevel OptLevel) { + return new CoroSplitLegacy(OptLevel); +} Index: llvm/lib/Transforms/Coroutines/Coroutines.cpp =================================================================== --- llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -48,6 +48,21 @@ initializeCoroCleanupLegacyPass(Registry); } +PassBuilder::OptimizationLevel llvm::coro::getOptLevel(unsigned Level) { + switch (Level) { + case 0: + return PassBuilder::OptimizationLevel::O0; + case 1: + return PassBuilder::OptimizationLevel::O1; + case 2: + return PassBuilder::OptimizationLevel::O2; + case 3: + return PassBuilder::OptimizationLevel::O3; + default: + llvm_unreachable("Invalid optimization level!"); + } +} + static void addCoroutineOpt0Passes(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { PM.add(createCoroSplitLegacyPass()); @@ -69,7 +84,7 @@ static void addCoroutineSCCPasses(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { - PM.add(createCoroSplitLegacyPass()); + PM.add(createCoroSplitLegacyPass(coro::getOptLevel(Builder.OptLevel))); } static void addCoroutineOptimizerLastPasses(const PassManagerBuilder &Builder, Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-00.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-00.ll @@ -0,0 +1,79 @@ +; Check that we can handle spills of array allocas +; RUN: opt < %s -coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s +; RUN: opt < %s -passes='coro-split' -S | FileCheck %s + +%struct.big_structure = type { [500 x i8] } +declare void @consume(%struct.big_structure*) + +; Function Attrs: noinline optnone uwtable +define i8* @f(i1 %cond) "coroutine.presplit"="1" { +entry: + %data = alloca %struct.big_structure, align 1 + %data2 = alloca %struct.big_structure, align 1 + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + br i1 %cond, label %then, label %else + +then: + %0 = bitcast %struct.big_structure* %data to i8* + call void @llvm.lifetime.start.p0i8(i64 500, i8* nonnull %0) + call void @consume(%struct.big_structure* %data) + %suspend.value = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %suspend.value, label %coro.ret [i8 0, label %resume + i8 1, label %cleanup1] + +resume: + call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %0) + br label %cleanup1 + +cleanup1: + call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %0) + br label %cleanup + +else: + %1 = bitcast %struct.big_structure* %data2 to i8* + call void @llvm.lifetime.start.p0i8(i64 500, i8* nonnull %1) + call void @consume(%struct.big_structure* %data2) + %suspend.value2 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %suspend.value2, label %coro.ret [i8 0, label %resume2 + i8 1, label %cleanup2] + +resume2: + call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %1) + br label %cleanup2 + +cleanup2: + call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %1) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %coro.ret +coro.ret: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; CHECK-LABEL: @f( +; CHECK: call i8* @malloc(i32 520) + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @malloc(i32) +declare double @print(double) +declare void @free(i8*) + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) \ No newline at end of file Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-01.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-01.ll @@ -0,0 +1,77 @@ +; Tests that variables in a Corotuine whose lifetime range is not overlapping each other +; re-use the same slot in Coroutine frame. +; RUN: opt < %s -coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s +; RUN: opt < %s -passes='coro-split' -S | FileCheck %s +%"struct.task::promise_type" = type { i8 } +%struct.awaitable = type { i8 } +%struct.big_structure = type { [500 x i8] } +declare i8* @malloc(i64) +declare void @consume(%struct.big_structure*) +define void @a(i1 zeroext %cond) "coroutine.presplit"="1" { +entry: + %__promise = alloca %"struct.task::promise_type", align 1 + %a = alloca %struct.big_structure, align 1 + %ref.tmp7 = alloca %struct.awaitable, align 1 + %b = alloca %struct.big_structure, align 1 + %ref.tmp18 = alloca %struct.awaitable, align 1 + %0 = getelementptr inbounds %"struct.task::promise_type", %"struct.task::promise_type"* %__promise, i64 0, i32 0 + %1 = call token @llvm.coro.id(i32 16, i8* nonnull %0, i8* bitcast (void (i1)* @a to i8*), i8* null) + br label %init.ready +init.ready: + %2 = call noalias nonnull i8* @llvm.coro.begin(token %1, i8* null) + call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %0) + br i1 %cond, label %if.then, label %if.else +if.then: + %3 = getelementptr inbounds %struct.big_structure, %struct.big_structure* %a, i64 0, i32 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 500, i8* nonnull %3) + call void @consume(%struct.big_structure* nonnull %a) + %save = call token @llvm.coro.save(i8* null) + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %suspend, label %coro.ret [ + i8 0, label %await.ready + i8 1, label %cleanup1 + ] +await.ready: + call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %3) + br label %cleanup1 +if.else: + %4 = getelementptr inbounds %struct.big_structure, %struct.big_structure* %b, i64 0, i32 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 500, i8* nonnull %4) + call void @consume(%struct.big_structure* nonnull %b) + %save2 = call token @llvm.coro.save(i8* null) + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %coro.ret [ + i8 0, label %await2.ready + i8 1, label %cleanup2 + ] +await2.ready: + call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %4) + br label %cleanup2 +cleanup1: + call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %3) + br label %cleanup +cleanup2: + call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %4) + br label %cleanup +cleanup: + call i8* @llvm.coro.free(token %1, i8* %2) + br label %coro.ret +coro.ret: + call i1 @llvm.coro.end(i8* null, i1 false) + ret void +} +; CHECK-LABEL: @a.resume( +; CHECK: %a.reload.addr{{[0-9]+}} = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr[[APositon:.*]] +; CHECK: %b.reload.addr{{[0-9]+}} = getelementptr inbounds %a.Frame, %a.Frame* %FramePtr[[APositon]] + +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) +declare i1 @llvm.coro.alloc(token) #3 +declare i64 @llvm.coro.size.i64() #5 +declare i8* @llvm.coro.begin(token, i8* writeonly) #3 +declare token @llvm.coro.save(i8*) #3 +declare i8* @llvm.coro.frame() #5 +declare i8 @llvm.coro.suspend(token, i1) #3 +declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2 +declare i1 @llvm.coro.end(i8*, i1) #3 +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #4 +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #4 \ No newline at end of file Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-02.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-02.ll @@ -0,0 +1,78 @@ +; Tests that variables of different type in a Corotuine whose lifetime range is not overlapping each other +; re-use the same slot in Coroutine frame. +; RUN: opt < %s -coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s +; RUN: opt < %s -passes='coro-split' -S | FileCheck %s +%"struct.task::promise_type" = type { i8 } +%struct.awaitable = type { i8 } +%struct.big_structure = type { [500 x i8] } +%struct.big_structure.2 = type { [300 x i8] } +declare i8* @malloc(i64) +declare void @consume(%struct.big_structure*) +declare void @consume.2(%struct.big_structure.2*) +define void @a(i1 zeroext %cond) "coroutine.presplit"="1" { +entry: + %__promise = alloca %"struct.task::promise_type", align 1 + %a = alloca %struct.big_structure, align 1 + %ref.tmp7 = alloca %struct.awaitable, align 1 + %b = alloca %struct.big_structure.2, align 1 + %ref.tmp18 = alloca %struct.awaitable, align 1 + %0 = getelementptr inbounds %"struct.task::promise_type", %"struct.task::promise_type"* %__promise, i64 0, i32 0 + %1 = call token @llvm.coro.id(i32 16, i8* nonnull %0, i8* bitcast (void (i1)* @a to i8*), i8* null) + br label %init.ready +init.ready: + %2 = call noalias nonnull i8* @llvm.coro.begin(token %1, i8* null) + call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %0) + br i1 %cond, label %if.then, label %if.else +if.then: + %3 = getelementptr inbounds %struct.big_structure, %struct.big_structure* %a, i64 0, i32 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 500, i8* nonnull %3) + call void @consume(%struct.big_structure* nonnull %a) + %save = call token @llvm.coro.save(i8* null) + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %suspend, label %coro.ret [ + i8 0, label %await.ready + i8 1, label %cleanup1 + ] +await.ready: + call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %3) + br label %cleanup1 +if.else: + %4 = getelementptr inbounds %struct.big_structure.2, %struct.big_structure.2* %b, i64 0, i32 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 300, i8* nonnull %4) + call void @consume.2(%struct.big_structure.2* nonnull %b) + %save2 = call token @llvm.coro.save(i8* null) + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %coro.ret [ + i8 0, label %await2.ready + i8 1, label %cleanup2 + ] +await2.ready: + call void @llvm.lifetime.end.p0i8(i64 300, i8* nonnull %4) + br label %cleanup2 +cleanup1: + call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %3) + br label %cleanup +cleanup2: + call void @llvm.lifetime.end.p0i8(i64 300, i8* nonnull %4) + br label %cleanup +cleanup: + call i8* @llvm.coro.free(token %1, i8* %2) + br label %coro.ret +coro.ret: + call i1 @llvm.coro.end(i8* null, i1 false) + ret void +} +; CHECK-LABEL: @a.resume( +; CHECK: %b.reload.addr = bitcast %struct.big_structure* %0 to %struct.big_structure.2* + +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) +declare i1 @llvm.coro.alloc(token) #3 +declare i64 @llvm.coro.size.i64() #5 +declare i8* @llvm.coro.begin(token, i8* writeonly) #3 +declare token @llvm.coro.save(i8*) #3 +declare i8* @llvm.coro.frame() #5 +declare i8 @llvm.coro.suspend(token, i1) #3 +declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2 +declare i1 @llvm.coro.end(i8*, i1) #3 +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #4 +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #4 \ No newline at end of file Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-03.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-03.ll @@ -0,0 +1,79 @@ +; Check that we should not reuse alloca sotrage in O0. +; RUN: opt < %s -coro-split -S | FileCheck %s +; RUN: opt < %s -passes='coro-split' -S | FileCheck %s + +%struct.big_structure = type { [500 x i8] } +declare void @consume(%struct.big_structure*) + +; Function Attrs: noinline optnone uwtable +define i8* @f(i1 %cond) "coroutine.presplit"="1" { +entry: + %data = alloca %struct.big_structure, align 1 + %data2 = alloca %struct.big_structure, align 1 + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + br i1 %cond, label %then, label %else + +then: + %0 = bitcast %struct.big_structure* %data to i8* + call void @llvm.lifetime.start.p0i8(i64 500, i8* nonnull %0) + call void @consume(%struct.big_structure* %data) + %suspend.value = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %suspend.value, label %coro.ret [i8 0, label %resume + i8 1, label %cleanup1] + +resume: + call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %0) + br label %cleanup1 + +cleanup1: + call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %0) + br label %cleanup + +else: + %1 = bitcast %struct.big_structure* %data2 to i8* + call void @llvm.lifetime.start.p0i8(i64 500, i8* nonnull %1) + call void @consume(%struct.big_structure* %data2) + %suspend.value2 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %suspend.value2, label %coro.ret [i8 0, label %resume2 + i8 1, label %cleanup2] + +resume2: + call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %1) + br label %cleanup2 + +cleanup2: + call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %1) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %coro.ret +coro.ret: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; CHECK-LABEL: @f( +; CHECK: call i8* @malloc(i32 1024) + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @malloc(i32) +declare double @print(double) +declare void @free(i8*) + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) \ No newline at end of file