diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -296,20 +296,30 @@ class FrameTypeBuilder; // Mapping from the to-be-spilled value to all the users that need reload. using SpillInfo = SmallMapVector, 8>; +struct AllocaInfo { + AllocaInst *Alloca; + DenseMap> Aliases; + bool MayWriteBeforeCoroBegin; + AllocaInfo(AllocaInst *Alloca, + DenseMap> Aliases, + bool MayWriteBeforeCoroBegin) + : Alloca(Alloca), Aliases(std::move(Aliases)), + MayWriteBeforeCoroBegin(MayWriteBeforeCoroBegin) {} +}; struct FrameDataInfo { // All the values (that are not allocas) that needs to be spilled to the // frame. SpillInfo Spills; // Allocas contains all values defined as allocas that need to live in the // frame. - SmallVector Allocas; + SmallVector Allocas; SmallVector getAllDefs() const { SmallVector Defs; for (const auto &P : Spills) Defs.push_back(P.first); - for (auto *A : Allocas) - Defs.push_back(A); + for (const auto &A : Allocas) + Defs.push_back(A.Alloca); return Defs; } @@ -351,10 +361,11 @@ } } -static void dumpAllocas(const SmallVectorImpl &Allocas) { +static void dumpAllocas(const SmallVectorImpl &Allocas) { dbgs() << "------------- Allocas --------------\n"; - for (auto *A : Allocas) - A->dump(); + for (const auto &A : Allocas) { + A.Alloca->dump(); + } } #endif @@ -491,8 +502,8 @@ LayoutIndexUpdateStarted = true; for (auto &S : Spills) Updater(S.first); - for (auto *A : Allocas) - Updater(A); + for (const auto &A : Allocas) + Updater(A.Alloca); LayoutIndexUpdateStarted = false; } @@ -519,7 +530,8 @@ }); if (!Shape.ReuseFrameSlot && !EnableReuseStorageInFrame) { - for (auto *Alloca : FrameData.Allocas) { + for (const auto &A : FrameData.Allocas) { + AllocaInst *Alloca = A.Alloca; AllocaIndex[Alloca] = NonOverlapedAllocas.size(); NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); } @@ -549,15 +561,22 @@ } } - StackLifetime StackLifetimeAnalyzer(F, FrameData.Allocas, + auto ExtractAllocas = [&]() { + AllocaSetType Allocas; + Allocas.reserve(FrameData.Allocas.size()); + for (const auto &A : FrameData.Allocas) + Allocas.push_back(A.Alloca); + return Allocas; + }; + StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas(), StackLifetime::LivenessType::May); StackLifetimeAnalyzer.run(); auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) { return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps( StackLifetimeAnalyzer.getLiveRange(AI2)); }; - auto GetAllocaSize = [&](const AllocaInst *AI) { - Optional RetSize = AI->getAllocationSizeInBits(DL); + auto GetAllocaSize = [&](const AllocaInfo &A) { + Optional RetSize = A.Alloca->getAllocationSizeInBits(DL); assert(RetSize && "We can't handle scalable type now.\n"); return RetSize.getValue(); }; @@ -565,10 +584,11 @@ // priority to merge, which can save more space potentially. Also each // AllocaSet would be ordered. So we can get the largest Alloca in one // AllocaSet easily. - sort(FrameData.Allocas, [&](auto Iter1, auto Iter2) { + sort(FrameData.Allocas, [&](const auto &Iter1, const auto &Iter2) { return GetAllocaSize(Iter1) > GetAllocaSize(Iter2); }); - for (auto *Alloca : FrameData.Allocas) { + for (const auto &A : FrameData.Allocas) { + AllocaInst *Alloca = A.Alloca; bool Merged = false; // Try to find if the Alloca is not inferenced with any existing // NonOverlappedAllocaSet. If it is true, insert the alloca to that @@ -773,64 +793,72 @@ return FrameTy; } -// We use a pointer use visitor to discover if there are any writes into an -// alloca that dominates CoroBegin. If that is the case, insertSpills will copy -// the value from the alloca into the coroutine frame spill slot corresponding -// to that alloca. We also collect any alias pointing to the alloca created -// before CoroBegin but used after CoroBegin. These alias will be recreated -// after CoroBegin from the frame address so that latter references are -// pointing to the frame instead of the stack. -// Note: We are repurposing PtrUseVisitor's isEscaped() to mean whether the -// pointer is potentially written into. -// TODO: If the pointer is really escaped, we are in big trouble because we -// will be escaping a pointer to a stack address that would no longer exist -// soon. However most escape analysis isn't good enough to precisely tell, -// so we are assuming that if a pointer is escaped that it's written into. -// TODO: Another potential issue is if we are creating an alias through -// a function call, e.g: -// %a = AllocaInst ... -// %b = call @computeAddress(... %a) -// If %b is an alias of %a and will be used after CoroBegin, this will be broken -// and there is nothing we can do about it. +// We use a pointer use visitor to track how an alloca is being used. +// The goal is to be able to answer the following three questions: +// 1. Should this alloca be allocated on the frame instead. +// 2. Could the content of the alloca be modified prior to CoroBegn, which would +// require copying the data from alloca to the frame after CoroBegin. +// 3. Is there any alias created for this alloca prior to CoroBegin, but used +// after CoroBegin. In that case, we will need to recreate the alias after +// CoroBegin based off the frame. To answer question 1, we track two things: +// a. List of all BasicBlocks that use this alloca or any of the aliases of +// the alloca. In the end, we check if there exists any two basic blocks that +// cross suspension points. If so, this alloca must be put on the frame. b. +// Whether the alloca or any alias of the alloca is escaped at some point, +// either by storing the address somewhere, or the address is used in a +// function call that might capture. If it's ever escaped, this alloca must be +// put on the frame conservatively. +// To answer quetion 2, we track through the variable MayWriteBeforeCoroBegin. +// Whenever a potential write happens, either through a store instruction, a +// function call or any of the memory intrinsics, we check whether this +// instruction is prior to CoroBegin. To answer question 3, we track the offsets +// of all aliases created for the alloca prior to CoroBegin but used after +// CoroBegin. llvm::Optional is used to be able to represent the case when the +// offset is unknown (e.g. when you have a PHINode that takes in different +// offset values). We cannot handle unknown offsets and will assert. This is the +// potential issue left out. An ideal solution would likely require a +// significant redesign. namespace { struct AllocaUseVisitor : PtrUseVisitor { using Base = PtrUseVisitor; AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT, - const CoroBeginInst &CB) - : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {} + const CoroBeginInst &CB, const SuspendCrossingInfo &Checker) + : PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker) {} - // We are only interested in uses that's not dominated by coro.begin. void visit(Instruction &I) { - if (!DT.dominates(&CoroBegin, &I)) - Base::visit(I); + UserBBs.insert(I.getParent()); + Base::visit(I); + // If the pointer is escaped prior to CoroBegin, we have to assume it would + // be written into before CoroBegin as well. + if (PI.isEscaped() && !DT.dominates(&CoroBegin, PI.getEscapingInst())) { + MayWriteBeforeCoroBegin = true; + } } // We need to provide this overload as PtrUseVisitor uses a pointer based // visiting function. void visit(Instruction *I) { return visit(*I); } - // We cannot handle PHI node and SelectInst because they could be selecting - // between two addresses that point to different Allocas. void visitPHINode(PHINode &I) { - assert(!usedAfterCoroBegin(I) && - "Unable to handle PHI node of aliases created before CoroBegin but " - "used after CoroBegin"); + enqueueUsers(I); + handleAlias(I); } void visitSelectInst(SelectInst &I) { - assert(!usedAfterCoroBegin(I) && - "Unable to handle Select of aliases created before CoroBegin but " - "used after CoroBegin"); + enqueueUsers(I); + handleAlias(I); } - void visitLoadInst(LoadInst &) {} + void visitStoreInst(StoreInst &SI) { + // Base visit function will handle escape setting. + Base::visitStoreInst(SI); - // If the use is an operand, the pointer escaped and anything can write into - // that memory. If the use is the pointer, we are definitely writing into the - // alloca and therefore we need to copy. - void visitStoreInst(StoreInst &SI) { PI.setEscaped(&SI); } + // Regardless whether the alias of the alloca is the value operand or the + // pointer operand, we need to assume the alloca is been written. + handleMayWrite(SI); + } // All mem intrinsics modify the data. - void visitMemIntrinsic(MemIntrinsic &MI) { PI.setEscaped(&MI); } + void visitMemIntrinsic(MemIntrinsic &MI) { handleMayWrite(MI); } void visitBitCastInst(BitCastInst &BC) { Base::visitBitCastInst(BC); @@ -848,16 +876,62 @@ handleAlias(GEPI); } - const SmallVector, 1> &getAliases() const { - return Aliases; + void visitCallBase(CallBase &CB) { + for (unsigned i = 0, e = CB.getNumArgOperands(); i < e; ++i) + if (U->get() == CB.getArgOperand(i) && !CB.doesNotCapture(i)) + PI.setEscaped(&CB); + handleMayWrite(CB); + } + + bool getShouldLiveOnFrame() const { + if (!ShouldLiveOnFrame) + ShouldLiveOnFrame = computeShouldLiveOnFrame(); + return ShouldLiveOnFrame.getValue(); + } + + bool getMayWriteBeforeCoroBegin() const { return MayWriteBeforeCoroBegin; } + + DenseMap> getAliasesCopy() const { + assert(getShouldLiveOnFrame() && "This method should only be called if the " + "alloca needs to live on the frame."); + for (const auto &P : AliasOffetMap) { + assert(P.second && "Unable to handle an alias with unknown offset."); + if (PHINode *PN = dyn_cast(P.first)) { + for (const auto &V : PN->incoming_values()) { + } + } + } + return AliasOffetMap; } private: const DominatorTree &DT; const CoroBeginInst &CoroBegin; + const SuspendCrossingInfo &Checker; // All alias to the original AllocaInst, and are used after CoroBegin. // Each entry contains the instruction and the offset in the original Alloca. - SmallVector, 1> Aliases{}; + DenseMap> AliasOffetMap{}; + SmallPtrSet UserBBs{}; + bool MayWriteBeforeCoroBegin{false}; + + mutable llvm::Optional ShouldLiveOnFrame{}; + + bool computeShouldLiveOnFrame() const { + if (PI.isEscaped()) + return true; + + for (auto *BB1 : UserBBs) + for (auto *BB2 : UserBBs) + if (Checker.hasPathCrossingSuspendPoint(BB1, BB2)) + return true; + + return false; + } + + void handleMayWrite(const Instruction &I) { + if (!DT.dominates(&CoroBegin, &I)) + MayWriteBeforeCoroBegin = true; + } bool usedAfterCoroBegin(Instruction &I) { for (auto &U : I.uses()) @@ -867,12 +941,24 @@ } void handleAlias(Instruction &I) { - if (!usedAfterCoroBegin(I)) + // We track all aliases created prior to CoroBegin but used after. + // These aliases may need to be recreated after CoroBegin if the alloca + // need to live on the frame. + if (DT.dominates(&CoroBegin, &I) || !usedAfterCoroBegin(I)) return; - assert(IsOffsetKnown && "Can only handle alias with known offset created " - "before CoroBegin and used after"); - Aliases.emplace_back(&I, Offset); + if (!IsOffsetKnown) { + AliasOffetMap[&I] = {}; + } else { + auto Itr = AliasOffetMap.find(&I); + if (Itr == AliasOffetMap.end()) { + AliasOffetMap[&I] = Offset; + } else if (Itr->second.hasValue() && Itr->second.getValue() != Offset) { + // If we have seen two different possible values for this alias, we set + // it to empty. + AliasOffetMap[&I] = {}; + } + } } }; } // namespace @@ -1063,13 +1149,14 @@ // If we found any allocas, replace all of their remaining uses with Geps. Builder.SetInsertPoint(&SpillBlock->front()); for (const auto &P : FrameData.Allocas) { - auto *G = GetFramePointer(P); + AllocaInst *Alloca = P.Alloca; + auto *G = GetFramePointer(Alloca); // We are not using ReplaceInstWithInst(P.first, cast(G)) // here, as we are changing location of the instruction. - G->takeName(P); - P->replaceAllUsesWith(G); - P->eraseFromParent(); + G->takeName(Alloca); + Alloca->replaceAllUsesWith(G); + Alloca->eraseFromParent(); } return FramePtr; } @@ -1079,71 +1166,61 @@ // we also delete the original dbg.declare and replace other uses with undef. // Note: We cannot replace the alloca with GEP instructions indiscriminately, // as some of the uses may not be dominated by CoroBegin. - bool MightNeedToCopy = false; Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); SmallVector UsersToUpdate; - for (AllocaInst *A : FrameData.Allocas) { + for (const auto &A : FrameData.Allocas) { + AllocaInst *Alloca = A.Alloca; UsersToUpdate.clear(); - for (User *U : A->users()) { + for (User *U : Alloca->users()) { auto *I = cast(U); if (DT.dominates(CB, I)) UsersToUpdate.push_back(I); - else - MightNeedToCopy = true; } - if (!UsersToUpdate.empty()) { - auto *G = GetFramePointer(A); - G->setName(A->getName() + Twine(".reload.addr")); - TinyPtrVector DIs = FindDbgDeclareUses(A); - if (!DIs.empty()) - DIBuilder(*A->getModule(), - /*AllowUnresolved*/ false) - .insertDeclare(G, DIs.front()->getVariable(), - DIs.front()->getExpression(), - DIs.front()->getDebugLoc(), DIs.front()); - for (auto *DI : FindDbgDeclareUses(A)) - DI->eraseFromParent(); - replaceDbgUsesWithUndef(A); - - for (Instruction *I : UsersToUpdate) - I->replaceUsesOfWith(A, G); + if (UsersToUpdate.empty()) + continue; + auto *G = GetFramePointer(Alloca); + G->setName(Alloca->getName() + Twine(".reload.addr")); + TinyPtrVector DIs = FindDbgDeclareUses(Alloca); + if (!DIs.empty()) + DIBuilder(*Alloca->getModule(), + /*AllowUnresolved*/ false) + .insertDeclare(G, DIs.front()->getVariable(), + DIs.front()->getExpression(), + DIs.front()->getDebugLoc(), DIs.front()); + for (auto *DI : FindDbgDeclareUses(Alloca)) + DI->eraseFromParent(); + replaceDbgUsesWithUndef(Alloca); + + for (Instruction *I : UsersToUpdate) + I->replaceUsesOfWith(Alloca, G); + } + Builder.SetInsertPoint(FramePtr->getNextNode()); + for (const auto &A : FrameData.Allocas) { + AllocaInst *Alloca = A.Alloca; + if (A.MayWriteBeforeCoroBegin) { + // isEscaped really means potentially modified before CoroBegin. + if (Alloca->isArrayAllocation()) + report_fatal_error( + "Coroutines cannot handle copying of array allocas yet"); + + auto *G = GetFramePointer(Alloca); + auto *Value = Builder.CreateLoad(Alloca->getAllocatedType(), Alloca); + Builder.CreateStore(Value, G); } - } - // If we discovered such uses not dominated by CoroBegin, see if any of them - // preceed coro begin and have instructions that can modify the - // value of the alloca and therefore would require a copying the value into - // the spill slot in the coroutine frame. - if (MightNeedToCopy) { - Builder.SetInsertPoint(FramePtr->getNextNode()); - - for (AllocaInst *A : FrameData.Allocas) { - AllocaUseVisitor Visitor(A->getModule()->getDataLayout(), DT, *CB); - auto PtrI = Visitor.visitPtr(*A); - assert(!PtrI.isAborted()); - if (PtrI.isEscaped()) { - // isEscaped really means potentially modified before CoroBegin. - if (A->isArrayAllocation()) - report_fatal_error( - "Coroutines cannot handle copying of array allocas yet"); - - auto *G = GetFramePointer(A); - auto *Value = Builder.CreateLoad(A->getAllocatedType(), A); - Builder.CreateStore(Value, G); - } - // For each alias to Alloca created before CoroBegin but used after - // CoroBegin, we recreate them after CoroBegin by appplying the offset - // to the pointer in the frame. - for (const auto &Alias : Visitor.getAliases()) { - auto *FramePtr = GetFramePointer(A); - auto *FramePtrRaw = - Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C)); - auto *AliasPtr = Builder.CreateGEP( - FramePtrRaw, ConstantInt::get(Type::getInt64Ty(C), Alias.second)); - auto *AliasPtrTyped = - Builder.CreateBitCast(AliasPtr, Alias.first->getType()); - Alias.first->replaceUsesWithIf( - AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); }); - } + // For each alias to Alloca created before CoroBegin but used after + // CoroBegin, we recreate them after CoroBegin by appplying the offset + // to the pointer in the frame. + for (const auto &Alias : A.Aliases) { + auto *FramePtr = GetFramePointer(Alloca); + auto *FramePtrRaw = + Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C)); + auto *AliasPtr = Builder.CreateGEP( + FramePtrRaw, + ConstantInt::get(Type::getInt64Ty(C), Alias.second.getValue())); + auto *AliasPtrTyped = + Builder.CreateBitCast(AliasPtr, Alias.first->getType()); + Alias.first->replaceUsesWithIf( + AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); }); } } return FramePtr; @@ -1915,8 +1992,8 @@ } static void collectFrameAllocas(Function &F, coro::Shape &Shape, - SuspendCrossingInfo &Checker, - SmallVectorImpl &Allocas) { + const SuspendCrossingInfo &Checker, + SmallVectorImpl &Allocas) { // Collect lifetime.start info for each alloca. using LifetimeStart = SmallPtrSet; llvm::DenseMap> LifetimeMap; @@ -1944,24 +2021,31 @@ if (AI == Shape.SwitchLowering.PromiseAlloca) { continue; } + bool ShouldLiveOnFrame = false; auto Iter = LifetimeMap.find(AI); - for (User *U : I.users()) { - bool ShouldLiveOnFrame = false; - + if (Iter != LifetimeMap.end()) { // Check against lifetime.start if the instruction has the info. - if (Iter != LifetimeMap.end()) - for (auto *S : *Iter->second) { + for (User *U : I.users()) { + for (auto *S : *Iter->second) if ((ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(*S, U))) break; - } - else - ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(I, U); - - if (ShouldLiveOnFrame) { - Allocas.push_back(AI); - break; + if (ShouldLiveOnFrame) + break; } + if (!ShouldLiveOnFrame) + continue; } + // At this point, either ShouldLiveOnFrame is true or we didn't have + // lifetime information. We will need to rely on more precise pointer + // tracking. + DominatorTree DT(F); + AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT, + *Shape.CoroBegin, Checker}; + Visitor.visitPtr(*AI); + if (!Visitor.getShouldLiveOnFrame()) + continue; + Allocas.emplace_back(AI, Visitor.getAliasesCopy(), + Visitor.getMayWriteBeforeCoroBegin()); } } @@ -2085,7 +2169,11 @@ Shape.FrameTy = buildFrameType(F, Shape, FrameData); // Add PromiseAlloca to Allocas list so that it is processed in insertSpills. if (Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.PromiseAlloca) - FrameData.Allocas.push_back(Shape.SwitchLowering.PromiseAlloca); + // TODO: We assume that the promise alloca won't be modified before + // CoroBegin and no alias will be create before CoroBegin. + FrameData.Allocas.emplace_back( + Shape.SwitchLowering.PromiseAlloca, + DenseMap>{}, false); Shape.FramePtr = insertSpills(FrameData, Shape); lowerLocalAllocas(LocalAllocas, DeadInstructions); diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-01.ll b/llvm/test/Transforms/Coroutines/coro-alloca-01.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-alloca-01.ll @@ -0,0 +1,67 @@ +; Tests that CoroSplit can succesfully determine allocas should live on the frame +; if their aliases are used across suspension points through PHINode. +; RUN: opt < %s -coro-split -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -S | FileCheck %s + +define i8* @f(i1 %n) "coroutine.presplit"="1" { +entry: + %x = alloca i64 + %y = alloca i64 + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + br i1 %n, label %flag_true, label %flag_false + +flag_true: + %x.alias = bitcast i64* %x to i32* + br label %merge + +flag_false: + %y.alias = bitcast i64* %y to i32* + br label %merge + +merge: + %alias_phi = phi i32* [ %x.alias, %flag_true ], [ %y.alias, %flag_false ] + %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + call void @print(i32* %alias_phi) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; both %x and %y, as well as %alias_phi would all go to the frame. +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i32*, i1 } +; CHECK-LABEL: @f( +; CHECK: %x.reload.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 +; CHECK: %y.reload.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 +; CHECK: %x.alias = bitcast i64* %x.reload.addr to i32* +; CHECK: %y.alias = bitcast i64* %y.reload.addr to i32* +; CHECK: %alias_phi = select i1 %n, i32* %x.alias, i32* %y.alias +; CHECK: %alias_phi.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 +; CHECK: store i32* %alias_phi, i32** %alias_phi.spill.addr, align 8 + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare void @print(i32*) +declare noalias i8* @malloc(i32) +declare void @free(i8*) diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-02.ll b/llvm/test/Transforms/Coroutines/coro-alloca-02.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-alloca-02.ll @@ -0,0 +1,54 @@ +; Tests that if an alloca is escaped through storing the address, +; the alloac will be put on the frame. +; RUN: opt < %s -coro-split -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -S | FileCheck %s + +define i8* @f() "coroutine.presplit"="1" { +entry: + %x = alloca i64 + %y = alloca i32* + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %x.alias = bitcast i64* %x to i32* + store i32* %x.alias, i32** %y + %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + %x1 = load i32*, i32** %y + call void @print(i32* %x1) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i32*, i1 } +; CHECK-LABEL: define i8* @f() +; CHECK: %x.reload.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 +; CHECK: %y.reload.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 +; CHECK: %x.alias = bitcast i64* %x.reload.addr to i32* +; CHECK: store i32* %x.alias, i32** %y.reload.addr, align 8 + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare void @print(i32*) +declare noalias i8* @malloc(i32) +declare void @free(i8*) diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-03.ll b/llvm/test/Transforms/Coroutines/coro-alloca-03.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-alloca-03.ll @@ -0,0 +1,57 @@ +; Tests that allocas escaped through function calls will live on the frame. +; RUN: opt < %s -coro-split -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -S | FileCheck %s + +define i8* @f() "coroutine.presplit"="1" { +entry: + %x = alloca i64 + %y = alloca i64 + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %x.alias = bitcast i64* %x to i32* + call void @capture_call(i32* %x.alias) + %y.alias = bitcast i64* %y to i32* + call void @nocapture_call(i32* %y.alias) + %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape. +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i1 } +; CHECK-LABEL: define i8* @f() +; CHECK: %y = alloca i64, align 8 +; CHECK: %x.reload.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 +; CHECK: %x.alias = bitcast i64* %x.reload.addr to i32* +; CHECK: call void @capture_call(i32* %x.alias) +; CHECK: %y.alias = bitcast i64* %y to i32* +; CHECK: call void @nocapture_call(i32* %y.alias) + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare void @capture_call(i32*) +declare void @nocapture_call(i32* nocapture) +declare noalias i8* @malloc(i32) +declare void @free(i8*) diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-04.ll b/llvm/test/Transforms/Coroutines/coro-alloca-04.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-alloca-04.ll @@ -0,0 +1,65 @@ +; Tests that CoroSplit can succesfully determine allocas should live on the frame +; if their aliases are used across suspension points through PHINode. +; RUN: opt < %s -coro-split -S | FileCheck %s +; RUN: opt < %s -passes=coro-split -S | FileCheck %s + +define i8* @f(i1 %n) "coroutine.presplit"="1" { +entry: + %x = alloca i64 + br i1 %n, label %flag_true, label %flag_false + +flag_true: + %x.alias1 = bitcast i64* %x to i32* + br label %merge + +flag_false: + %x.alias2 = bitcast i64* %x to i32* + br label %merge + +merge: + %alias_phi = phi i32* [ %x.alias1, %flag_true ], [ %x.alias2, %flag_false ] + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + call void @print(i32* %alias_phi) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; both %x and %alias_phi would go to the frame. +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i32*, i1 } +; CHECK-LABEL: @f( +; CHECK: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr +; CHECK-NEXT: %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 +; CHECK-NEXT: %1 = bitcast i64* %0 to i8* +; CHECK-NEXT: %2 = bitcast i8* %1 to i32* +; CHECK-NEXT: %alias_phi.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 +; CHECK-NEXT: store i32* %2, i32** %alias_phi.spill.addr + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare void @print(i32*) +declare noalias i8* @malloc(i32) +declare void @free(i8*) diff --git a/llvm/test/Transforms/Coroutines/coro-debug-frame-variable.ll b/llvm/test/Transforms/Coroutines/coro-debug-frame-variable.ll --- a/llvm/test/Transforms/Coroutines/coro-debug-frame-variable.ll +++ b/llvm/test/Transforms/Coroutines/coro-debug-frame-variable.ll @@ -26,21 +26,21 @@ ; ; CHECK-LABEL: define void @f() { ; CHECK: entry: +; CHECK: %j = alloca i32, align 4 ; CHECK: [[IGEP:%.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 -; CHECK: [[JGEP:%.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5 ; CHECK: init.ready: ; CHECK: call void @llvm.dbg.declare(metadata i32* [[IGEP]], metadata ![[IVAR:[0-9]+]], metadata !DIExpression()), !dbg ![[IDBGLOC:[0-9]+]] ; CHECK: await.ready: -; CHECK: call void @llvm.dbg.declare(metadata i32* [[JGEP]], metadata ![[JVAR:[0-9]+]], metadata !DIExpression()), !dbg ![[JDBGLOC:[0-9]+]] +; CHECK: call void @llvm.dbg.declare(metadata i32* %j, metadata ![[JVAR:[0-9]+]], metadata !DIExpression()), !dbg ![[JDBGLOC:[0-9]+]] ; ; CHECK-LABEL: define internal fastcc void @f.resume({{.*}}) { ; CHECK: entry.resume: +; CHECK: %j = alloca i32, align 4 ; CHECK: [[IGEP_RESUME:%.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 -; CHECK: [[JGEP_RESUME:%.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5 ; CHECK: init.ready: ; CHECK: call void @llvm.dbg.declare(metadata i32* [[IGEP_RESUME]], metadata ![[IVAR_RESUME:[0-9]+]], metadata !DIExpression()), !dbg ![[IDBGLOC_RESUME:[0-9]+]] ; CHECK: await.ready: -; CHECK: call void @llvm.dbg.declare(metadata i32* [[JGEP_RESUME]], metadata ![[JVAR_RESUME:[0-9]+]], metadata !DIExpression()), !dbg ![[JDBGLOC_RESUME:[0-9]+]] +; CHECK: call void @llvm.dbg.declare(metadata i32* %j, metadata ![[JVAR_RESUME:[0-9]+]], metadata !DIExpression()), !dbg ![[JDBGLOC_RESUME:[0-9]+]] ; ; CHECK: ![[IVAR]] = !DILocalVariable(name: "i" ; CHECK: ![[SCOPE:[0-9]+]] = distinct !DILexicalBlock(scope: !8, file: !1, line: 23, column: 12)