diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -625,7 +625,22 @@ // We use a pointer use visitor to discover if there are any writes into an // alloca that dominates CoroBegin. If that is the case, insertSpills will copy // the value from the alloca into the coroutine frame spill slot corresponding -// to that alloca. +// to that alloca. We also collect any alias pointing to the alloca created +// before CoroBegin but used after CoroBegin. These alias will be recreated +// after CoroBegin from the frame address so that latter references are +// pointing to the frame instead of the stack. +// Note: We are repurposing PtrUseVisitor's isEscaped() to mean whether the +// pointer is potentially written into. +// TODO: If the pointer is really escaped, we are in big trouble because we +// will be escaping a pointer to a stack address that would no longer exist +// soon. However most escape analysis isn't good enough to precisely tell, +// so we are assuming that if a pointer is escaped that it's written into. +// TODO: Another potential issue is if we are creating an alias through +// a function call, e.g: +// %a = AllocaInst ... +// %b = call @computeAddress(... %a) +// If %b is an alias of %a and will be used after CoroBegin, this will be broken +// and there is nothing we can do about it. namespace { struct AllocaUseVisitor : PtrUseVisitor { using Base = PtrUseVisitor; @@ -633,49 +648,83 @@ const CoroBeginInst &CB) : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {} - // We are only interested in uses that dominate coro.begin. + // We are only interested in uses that's not dominated by coro.begin. void visit(Instruction &I) { - if (DT.dominates(&I, &CoroBegin)) + if (!DT.dominates(&CoroBegin, &I)) Base::visit(I); } // We need to provide this overload as PtrUseVisitor uses a pointer based // visiting function. void visit(Instruction *I) { return visit(*I); } - void visitLoadInst(LoadInst &) {} // Good. Nothing to do. + // We cannot handle PHI node and SelectInst because they could be selecting + // between two addresses that point to different Allocas. + void visitPHINode(PHINode &I) { + assert(!usedAfterCoroBegin(I) && + "Unable to handle PHI node of aliases created before CoroBegin but " + "used after CoroBegin"); + } + + void visitSelectInst(SelectInst &I) { + assert(!usedAfterCoroBegin(I) && + "Unable to handle Select of aliases created before CoroBegin but " + "used after CoroBegin"); + } + + void visitLoadInst(LoadInst &) {} // If the use is an operand, the pointer escaped and anything can write into // that memory. If the use is the pointer, we are definitely writing into the // alloca and therefore we need to copy. - void visitStoreInst(StoreInst &SI) { PI.setAborted(&SI); } + void visitStoreInst(StoreInst &SI) { PI.setEscaped(&SI); } - // Any other instruction that is not filtered out by PtrUseVisitor, will - // result in the copy. - void visitInstruction(Instruction &I) { PI.setAborted(&I); } + // All mem intrinsics modify the data. + void visitMemIntrinsic(MemIntrinsic &MI) { PI.setEscaped(&MI); } + + void visitBitCastInst(BitCastInst &BC) { + Base::visitBitCastInst(BC); + handleAlias(BC); + } + + void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { + Base::visitAddrSpaceCastInst(ASC); + handleAlias(ASC); + } + + void visitGetElementPtrInst(GetElementPtrInst &GEPI) { + // The base visitor will adjust Offset accordingly. + Base::visitGetElementPtrInst(GEPI); + handleAlias(GEPI); + } + + const SmallVector, 1> &getAliases() const { + return Aliases; + } private: const DominatorTree &DT; const CoroBeginInst &CoroBegin; + // All alias to the original AllocaInst, and are used after CoroBegin. + // Each entry contains the instruction and the offset in the original Alloca. + SmallVector, 1> Aliases{}; + + bool usedAfterCoroBegin(Instruction &I) { + for (auto &U : I.uses()) + if (DT.dominates(&CoroBegin, U)) + return true; + return false; + } + + void handleAlias(Instruction &I) { + if (!usedAfterCoroBegin(I)) + return; + + assert(IsOffsetKnown && "Can only handle alias with known offset created " + "before CoroBegin and used after"); + Aliases.emplace_back(&I, Offset); + } }; } // namespace -static bool mightWriteIntoAllocaPtr(AllocaInst &A, const DominatorTree &DT, - const CoroBeginInst &CB) { - const DataLayout &DL = A.getModule()->getDataLayout(); - AllocaUseVisitor Visitor(DL, DT, CB); - auto PtrI = Visitor.visitPtr(A); - if (PtrI.isEscaped() || PtrI.isAborted()) { - auto *PointerEscapingInstr = PtrI.getEscapingInst() - ? PtrI.getEscapingInst() - : PtrI.getAbortingInst(); - if (PointerEscapingInstr) { - LLVM_DEBUG( - dbgs() << "AllocaInst copy was triggered by instruction: " - << *PointerEscapingInstr << "\n"); - } - return true; - } - return false; -} // We need to make room to insert a spill after initial PHIs, but before // catchswitch instruction. Placing it before violates the requirement that @@ -955,7 +1004,11 @@ for (auto &P : Allocas) { AllocaInst *const A = P.first; - if (mightWriteIntoAllocaPtr(*A, DT, *CB)) { + AllocaUseVisitor Visitor(A->getModule()->getDataLayout(), DT, *CB); + auto PtrI = Visitor.visitPtr(*A); + assert(!PtrI.isAborted()); + if (PtrI.isEscaped()) { + // isEscaped really means potentially modified before CoroBegin. if (A->isArrayAllocation()) report_fatal_error( "Coroutines cannot handle copying of array allocas yet"); @@ -964,6 +1017,20 @@ auto *Value = Builder.CreateLoad(A->getAllocatedType(), A); Builder.CreateStore(Value, G); } + // For each alias to Alloca created before CoroBegin but used after + // CoroBegin, we recreate them after CoroBegin by appplying the offset + // to the pointer in the frame. + for (const auto &Alias : Visitor.getAliases()) { + auto *FramePtr = GetFramePointer(P.second, A); + auto *FramePtrRaw = + Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C)); + auto *AliasPtr = Builder.CreateGEP( + FramePtrRaw, ConstantInt::get(Type::getInt64Ty(C), Alias.second)); + auto *AliasPtrTyped = + Builder.CreateBitCast(AliasPtr, Alias.first->getType()); + Alias.first->replaceUsesWithIf( + AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); }); + } } } return FramePtr; diff --git a/llvm/test/Transforms/Coroutines/coro-param-copy.ll b/llvm/test/Transforms/Coroutines/coro-param-copy.ll --- a/llvm/test/Transforms/Coroutines/coro-param-copy.ll +++ b/llvm/test/Transforms/Coroutines/coro-param-copy.ll @@ -5,22 +5,37 @@ define i8* @f() "coroutine.presplit"="1" { entry: + %a.addr = alloca i64 ; read-only before coro.begin + %a = load i64, i64* %a.addr ; cannot modify the value, don't need to copy + %x.addr = alloca i64 - call void @use(i64* %x.addr) ; might write to %x + call void @use(i64* %x.addr) ; uses %x.addr before coro.begin + %y.addr = alloca i64 - %y = load i64, i64* %y.addr ; cannot modify the value, don't need to copy - call void @print(i64 %y) + %y.cast = bitcast i64* %y.addr to i8* ; alias created and used after coro.begin + + %z.addr = alloca i64 + %flag = call i1 @check() + br i1 %flag, label %flag_true, label %flag_merge + +flag_true: + call void @use(i64* %z.addr) ; conditionally used %z.addr + br label %flag_merge +flag_merge: %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) %size = call i32 @llvm.coro.size.i32() - %alloc = call i8* @myAlloc(i64 %y, i32 %size) + %alloc = call i8* @myAlloc(i32 %size) %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + call void @llvm.memset.p0i8.i32(i8* %y.cast, i8 1, i32 4, i1 false) %0 = call i8 @llvm.coro.suspend(token none, i1 false) switch i8 %0, label %suspend [i8 0, label %resume i8 1, label %cleanup] resume: + call void @use(i64* %a.addr) call void @use(i64* %x.addr) call void @use(i64* %y.addr) + call void @use(i64* %z.addr) br label %cleanup cleanup: @@ -33,26 +48,36 @@ } ; See that we added both x and y to the frame. -; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i1 } +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i64, i64, i1 } ; See that all of the uses prior to coro-begin stays put. ; CHECK-LABEL: define i8* @f() { ; CHECK-NEXT: entry: +; CHECK-NEXT: %a.addr = alloca i64 ; CHECK-NEXT: %x.addr = alloca i64 ; CHECK-NEXT: call void @use(i64* %x.addr) ; CHECK-NEXT: %y.addr = alloca i64 -; CHECK-NEXT: %y = load i64, i64* %y.addr -; CHECK-NEXT: call void @print(i64 %y) +; CHECK-NEXT: %z.addr = alloca i64 ; See that we only copy the x as y was not modified prior to coro.begin. -; CHECK: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr -; CHECK-NEXT: %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 -; CHECK-NEXT: %1 = load i64, i64* %x.addr -; CHECK-NEXT: store i64 %1, i64* %0 -; CHECK-NEXT: %index.addr1 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 -; CHECK-NEXT: store i1 false, i1* %index.addr1 +; CHECK: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr +; The next 3 instructions are to copy data in %x.addr from stack to frame. +; CHECK-NEXT: %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 +; CHECK-NEXT: %1 = load i64, i64* %x.addr, align 4 +; CHECK-NEXT: store i64 %1, i64* %0, align 4 +; The next 2 instructions are to recreate %y.cast in the original IR. +; CHECK-NEXT: %2 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 +; CHECK-NEXT: %3 = bitcast i64* %2 to i8* +; The next 3 instructions are to copy data in %z.addr from stack to frame. +; CHECK-NEXT: %4 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5 +; CHECK-NEXT: %5 = load i64, i64* %z.addr, align 4 +; CHECK-NEXT: store i64 %5, i64* %4, align 4 +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %3, i8 1, i32 4, i1 false) +; CHECK-NEXT: %index.addr1 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 6 +; CHECK-NEXT: store i1 false, i1* %index.addr1, align 1 ; CHECK-NEXT: ret i8* %hdl + declare i8* @llvm.coro.free(token, i8*) declare i32 @llvm.coro.size.i32() declare i8 @llvm.coro.suspend(token, i1) @@ -64,7 +89,9 @@ declare i8* @llvm.coro.begin(token, i8*) declare i1 @llvm.coro.end(i8*, i1) -declare noalias i8* @myAlloc(i64, i32) -declare void @print(i64) +declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1) + +declare noalias i8* @myAlloc(i32) declare void @use(i64*) declare void @free(i8*) +declare i1 @check()