Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Transforms/Coroutines/CoroFrame.cpp
Show First 20 Lines • Show All 286 Lines • ▼ Show 20 Lines | do { | ||||
} | } | ||||
} while (Changed); | } while (Changed); | ||||
LLVM_DEBUG(dump()); | LLVM_DEBUG(dump()); | ||||
} | } | ||||
#undef DEBUG_TYPE // "coro-suspend-crossing" | #undef DEBUG_TYPE // "coro-suspend-crossing" | ||||
#define DEBUG_TYPE "coro-frame" | #define DEBUG_TYPE "coro-frame" | ||||
// We build up the list of spills for every case where a use is separated | |||||
// from the definition by a suspend point. | |||||
static const unsigned InvalidFieldIndex = ~0U; | |||||
namespace { | namespace { | ||||
class Spill { | class FrameTypeBuilder; | ||||
Value *Def = nullptr; | // Mapping from the to-be-spilled value to all the users that need reload. | ||||
Instruction *User = nullptr; | using SpillInfo = SmallMapVector<Value *, SmallVector<Instruction *, 2>, 8>; | ||||
unsigned FieldNo = InvalidFieldIndex; | struct FrameDataInfo { | ||||
// All the values (that are not allocas) that needs to be spilled to the | |||||
public: | // frame. | ||||
Spill(Value *Def, llvm::User *U) : Def(Def), User(cast<Instruction>(U)) {} | SpillInfo Spills; | ||||
// Allocas contains all values defined as allocas that need to live in the | |||||
// frame. | |||||
SmallVector<AllocaInst *, 8> Allocas; | |||||
Value *def() const { return Def; } | SmallVector<Value *, 8> getAllDefs() const { | ||||
Instruction *user() const { return User; } | SmallVector<Value *, 8> Defs; | ||||
BasicBlock *userBlock() const { return User->getParent(); } | for (const auto &P : Spills) | ||||
Defs.push_back(P.first); | |||||
// Note that field index is stored in the first SpillEntry for a particular | for (auto *A : Allocas) | ||||
// definition. Subsequent mentions of a defintion do not have fieldNo | Defs.push_back(A); | ||||
// assigned. This works out fine as the users of Spills capture the info about | return Defs; | ||||
// the definition the first time they encounter it. Consider refactoring | } | ||||
// SpillInfo into two arrays to normalize the spill representation. | |||||
unsigned fieldIndex() const { | uint32_t getFieldIndex(Value *V) const { | ||||
assert(FieldNo != InvalidFieldIndex && "Accessing unassigned field"); | auto Itr = FieldIndexMap.find(V); | ||||
return FieldNo; | assert(Itr != FieldIndexMap.end() && | ||||
} | "Value does not have a frame field index"); | ||||
void setFieldIndex(unsigned FieldNumber) { | return Itr->second; | ||||
assert(FieldNo == InvalidFieldIndex && "Reassigning field number"); | } | ||||
FieldNo = FieldNumber; | |||||
void setFieldIndex(Value *V, uint32_t Index) { | |||||
assert((LayoutIndexUpdateStarted || FieldIndexMap.count(V) == 0) && | |||||
"Cannot set the index for the same field twice."); | |||||
FieldIndexMap[V] = Index; | |||||
} | } | ||||
// Remap the index of every field in the frame, using the final layout index. | |||||
void updateLayoutIndex(FrameTypeBuilder &B); | |||||
private: | |||||
// LayoutIndexUpdateStarted is used to avoid updating the index of any field | |||||
// twice by mistake. | |||||
bool LayoutIndexUpdateStarted = false; | |||||
// Map from values to their slot indexes on the frame. They will be first set | |||||
// with their original insertion field index. After the frame is built, their | |||||
// indexes will be updated into the final layout index. | |||||
DenseMap<Value *, uint32_t> FieldIndexMap; | |||||
}; | }; | ||||
} // namespace | } // namespace | ||||
// Note that there may be more than one record with the same value of Def in | |||||
// the SpillInfo vector. | |||||
using SpillInfo = SmallVector<Spill, 8>; | |||||
#ifndef NDEBUG | #ifndef NDEBUG | ||||
static void dump(StringRef Title, SpillInfo const &Spills) { | static void dumpSpills(StringRef Title, const SpillInfo &Spills) { | ||||
dbgs() << "------------- " << Title << "--------------\n"; | dbgs() << "------------- " << Title << "--------------\n"; | ||||
Value *CurrentValue = nullptr; | for (const auto &E : Spills) { | ||||
for (auto const &E : Spills) { | E.first->dump(); | ||||
if (CurrentValue != E.def()) { | |||||
CurrentValue = E.def(); | |||||
CurrentValue->dump(); | |||||
} | |||||
dbgs() << " user: "; | dbgs() << " user: "; | ||||
E.user()->dump(); | for (auto *I : E.second) | ||||
I->dump(); | |||||
} | } | ||||
} | } | ||||
static void dumpAllocas(const SmallVectorImpl<AllocaInst *> &Allocas) { | |||||
dbgs() << "------------- Allocas --------------\n"; | |||||
for (auto *A : Allocas) | |||||
A->dump(); | |||||
} | |||||
#endif | #endif | ||||
namespace { | namespace { | ||||
using FieldIDType = size_t; | |||||
// We cannot rely solely on natural alignment of a type when building a | // We cannot rely solely on natural alignment of a type when building a | ||||
// coroutine frame and if the alignment specified on the Alloca instruction | // coroutine frame and if the alignment specified on the Alloca instruction | ||||
// differs from the natural alignment of the alloca type we will need to insert | // differs from the natural alignment of the alloca type we will need to insert | ||||
// padding. | // padding. | ||||
class FrameTypeBuilder { | class FrameTypeBuilder { | ||||
public: | |||||
using ForSpillType = SmallVector<Spill *, 8>; | |||||
private: | private: | ||||
struct Field { | struct Field { | ||||
uint64_t Size; | uint64_t Size; | ||||
uint64_t Offset; | uint64_t Offset; | ||||
ForSpillType ForSpill; | |||||
Type *Ty; | Type *Ty; | ||||
unsigned FieldIndex; | FieldIDType LayoutFieldIndex; | ||||
Align Alignment; | Align Alignment; | ||||
Align TyAlignment; | Align TyAlignment; | ||||
}; | }; | ||||
const DataLayout &DL; | const DataLayout &DL; | ||||
LLVMContext &Context; | LLVMContext &Context; | ||||
uint64_t StructSize = 0; | uint64_t StructSize = 0; | ||||
Align StructAlign; | Align StructAlign; | ||||
bool IsFinished = false; | bool IsFinished = false; | ||||
SmallVector<Field, 8> Fields; | SmallVector<Field, 8> Fields; | ||||
DenseMap<Value*, unsigned> FieldIndexByKey; | DenseMap<Value*, unsigned> FieldIndexByKey; | ||||
public: | public: | ||||
FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL) | FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL) | ||||
: DL(DL), Context(Context) {} | : DL(DL), Context(Context) {} | ||||
class FieldId { | |||||
size_t Value; | |||||
explicit FieldId(size_t Value) : Value(Value) {} | |||||
friend class FrameTypeBuilder; | |||||
}; | |||||
/// Add a field to this structure for the storage of an `alloca` | /// Add a field to this structure for the storage of an `alloca` | ||||
/// instruction. | /// instruction. | ||||
FieldId addFieldForAlloca(AllocaInst *AI, ForSpillType ForSpill = {}, | LLVM_NODISCARD FieldIDType addFieldForAlloca(AllocaInst *AI, | ||||
bool IsHeader = false) { | bool IsHeader = false) { | ||||
Type *Ty = AI->getAllocatedType(); | Type *Ty = AI->getAllocatedType(); | ||||
// Make an array type if this is a static array allocation. | // Make an array type if this is a static array allocation. | ||||
if (AI->isArrayAllocation()) { | if (AI->isArrayAllocation()) { | ||||
if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) | if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) | ||||
Ty = ArrayType::get(Ty, CI->getValue().getZExtValue()); | Ty = ArrayType::get(Ty, CI->getValue().getZExtValue()); | ||||
else | else | ||||
report_fatal_error("Coroutines cannot handle non static allocas yet"); | report_fatal_error("Coroutines cannot handle non static allocas yet"); | ||||
} | } | ||||
return addField(Ty, AI->getAlign(), ForSpill, IsHeader); | return addField(Ty, AI->getAlign(), IsHeader); | ||||
} | } | ||||
/// We want to put the allocas whose lifetime-ranges are not overlapped | /// We want to put the allocas whose lifetime-ranges are not overlapped | ||||
/// into one slot of coroutine frame. | /// into one slot of coroutine frame. | ||||
/// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566 | /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566 | ||||
/// | /// | ||||
/// cppcoro::task<void> alternative_paths(bool cond) { | /// cppcoro::task<void> alternative_paths(bool cond) { | ||||
/// if (cond) { | /// if (cond) { | ||||
Show All 13 Lines | public: | ||||
/// This function use StackLifetime algorithm to partition the AllocaInsts in | /// This function use StackLifetime algorithm to partition the AllocaInsts in | ||||
/// Spills to non-overlapped sets in order to put Alloca in the same | /// Spills to non-overlapped sets in order to put Alloca in the same | ||||
/// non-overlapped set into the same slot in the Coroutine Frame. Then add | /// non-overlapped set into the same slot in the Coroutine Frame. Then add | ||||
/// field for the allocas in the same non-overlapped set by using the largest | /// field for the allocas in the same non-overlapped set by using the largest | ||||
/// type as the field type. | /// type as the field type. | ||||
/// | /// | ||||
/// Side Effects: Because We sort the allocas, the order of allocas in the | /// Side Effects: Because We sort the allocas, the order of allocas in the | ||||
/// frame may be different with the order in the source code. | /// frame may be different with the order in the source code. | ||||
void addFieldForAllocas(const Function &F, SpillInfo &Spills, | void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData, | ||||
coro::Shape &Shape); | coro::Shape &Shape); | ||||
/// Add a field to this structure. | /// Add a field to this structure. | ||||
FieldId addField(Type *Ty, MaybeAlign FieldAlignment, | LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment, | ||||
ForSpillType ForSpill = {}, bool IsHeader = false) { | bool IsHeader = false) { | ||||
assert(!IsFinished && "adding fields to a finished builder"); | assert(!IsFinished && "adding fields to a finished builder"); | ||||
assert(Ty && "must provide a type for a field"); | assert(Ty && "must provide a type for a field"); | ||||
// The field size is always the alloc size of the type. | // The field size is always the alloc size of the type. | ||||
uint64_t FieldSize = DL.getTypeAllocSize(Ty); | uint64_t FieldSize = DL.getTypeAllocSize(Ty); | ||||
// The field alignment might not be the type alignment, but we need | // The field alignment might not be the type alignment, but we need | ||||
// to remember the type alignment anyway to build the type. | // to remember the type alignment anyway to build the type. | ||||
Align TyAlignment = DL.getABITypeAlign(Ty); | Align TyAlignment = DL.getABITypeAlign(Ty); | ||||
if (!FieldAlignment) FieldAlignment = TyAlignment; | if (!FieldAlignment) FieldAlignment = TyAlignment; | ||||
// Lay out header fields immediately. | // Lay out header fields immediately. | ||||
uint64_t Offset; | uint64_t Offset; | ||||
if (IsHeader) { | if (IsHeader) { | ||||
Offset = alignTo(StructSize, FieldAlignment); | Offset = alignTo(StructSize, FieldAlignment); | ||||
StructSize = Offset + FieldSize; | StructSize = Offset + FieldSize; | ||||
// Everything else has a flexible offset. | // Everything else has a flexible offset. | ||||
} else { | } else { | ||||
Offset = OptimizedStructLayoutField::FlexibleOffset; | Offset = OptimizedStructLayoutField::FlexibleOffset; | ||||
} | } | ||||
Fields.push_back({FieldSize, Offset, ForSpill, Ty, 0, | Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment}); | ||||
*FieldAlignment, TyAlignment}); | return Fields.size() - 1; | ||||
return FieldId(Fields.size() - 1); | |||||
} | } | ||||
/// Finish the layout and set the body on the given type. | /// Finish the layout and set the body on the given type. | ||||
void finish(StructType *Ty); | void finish(StructType *Ty); | ||||
uint64_t getStructSize() const { | uint64_t getStructSize() const { | ||||
assert(IsFinished && "not yet finished!"); | assert(IsFinished && "not yet finished!"); | ||||
return StructSize; | return StructSize; | ||||
} | } | ||||
Align getStructAlign() const { | Align getStructAlign() const { | ||||
assert(IsFinished && "not yet finished!"); | assert(IsFinished && "not yet finished!"); | ||||
return StructAlign; | return StructAlign; | ||||
} | } | ||||
unsigned getFieldIndex(FieldId Id) const { | FieldIDType getLayoutFieldIndex(FieldIDType Id) const { | ||||
assert(IsFinished && "not yet finished!"); | assert(IsFinished && "not yet finished!"); | ||||
return Fields[Id.Value].FieldIndex; | return Fields[Id].LayoutFieldIndex; | ||||
} | } | ||||
}; | }; | ||||
} // namespace | } // namespace | ||||
void FrameTypeBuilder::addFieldForAllocas(const Function &F, SpillInfo &Spills, | void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) { | ||||
auto Updater = [&](Value *I) { | |||||
setFieldIndex(I, B.getLayoutFieldIndex(getFieldIndex(I))); | |||||
}; | |||||
LayoutIndexUpdateStarted = true; | |||||
for (auto &S : Spills) | |||||
Updater(S.first); | |||||
for (auto *A : Allocas) | |||||
Updater(A); | |||||
LayoutIndexUpdateStarted = false; | |||||
} | |||||
void FrameTypeBuilder::addFieldForAllocas(const Function &F, | |||||
FrameDataInfo &FrameData, | |||||
coro::Shape &Shape) { | coro::Shape &Shape) { | ||||
DenseMap<AllocaInst *, unsigned int> AllocaIndex; | DenseMap<AllocaInst *, unsigned int> AllocaIndex; | ||||
SmallVector<AllocaInst *, 8> Allocas; | |||||
DenseMap<AllocaInst *, Spill *> SpillOfAllocas; | |||||
using AllocaSetType = SmallVector<AllocaInst *, 4>; | using AllocaSetType = SmallVector<AllocaInst *, 4>; | ||||
SmallVector<AllocaSetType, 4> NonOverlapedAllocas; | SmallVector<AllocaSetType, 4> NonOverlapedAllocas; | ||||
// We need to add field for allocas at the end of this function. However, this | // We need to add field for allocas at the end of this function. However, this | ||||
// function has multiple exits, so we use this helper to avoid redundant code. | // function has multiple exits, so we use this helper to avoid redundant code. | ||||
struct RTTIHelper { | struct RTTIHelper { | ||||
std::function<void()> func; | std::function<void()> func; | ||||
RTTIHelper(std::function<void()> &&func) : func(func) {} | RTTIHelper(std::function<void()> &&func) : func(func) {} | ||||
~RTTIHelper() { func(); } | ~RTTIHelper() { func(); } | ||||
} Helper([&]() { | } Helper([&]() { | ||||
for (auto AllocaSet : NonOverlapedAllocas) { | for (auto AllocaList : NonOverlapedAllocas) { | ||||
ForSpillType ForSpills; | auto *LargestAI = *AllocaList.begin(); | ||||
for (auto Alloca : AllocaSet) | FieldIDType Id = addFieldForAlloca(LargestAI); | ||||
ForSpills.push_back(SpillOfAllocas[Alloca]); | for (auto *Alloca : AllocaList) | ||||
auto *LargestAI = *AllocaSet.begin(); | FrameData.setFieldIndex(Alloca, Id); | ||||
addFieldForAlloca(LargestAI, ForSpills); | |||||
} | } | ||||
}); | }); | ||||
for (auto &Spill : Spills) | |||||
if (AllocaInst *AI = dyn_cast<AllocaInst>(Spill.def())) | |||||
if (find(Allocas, AI) == Allocas.end()) { | |||||
SpillOfAllocas[AI] = &Spill; | |||||
Allocas.emplace_back(AI); | |||||
} | |||||
if (!Shape.ReuseFrameSlot && !EnableReuseStorageInFrame) { | if (!Shape.ReuseFrameSlot && !EnableReuseStorageInFrame) { | ||||
for (auto Alloca : Allocas) { | for (auto *Alloca : FrameData.Allocas) { | ||||
AllocaIndex[Alloca] = NonOverlapedAllocas.size(); | AllocaIndex[Alloca] = NonOverlapedAllocas.size(); | ||||
NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); | NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); | ||||
} | } | ||||
return; | return; | ||||
} | } | ||||
// Because there are pathes from the lifetime.start to coro.end | // Because there are pathes from the lifetime.start to coro.end | ||||
// for each alloca, the liferanges for every alloca is overlaped | // for each alloca, the liferanges for every alloca is overlaped | ||||
Show All 13 Lines | for (auto U : CoroSuspendInst->users()) { | ||||
if (auto *ConstSWI = dyn_cast<SwitchInst>(U)) { | if (auto *ConstSWI = dyn_cast<SwitchInst>(U)) { | ||||
auto *SWI = const_cast<SwitchInst *>(ConstSWI); | auto *SWI = const_cast<SwitchInst *>(ConstSWI); | ||||
DefaultSuspendDest[SWI] = SWI->getDefaultDest(); | DefaultSuspendDest[SWI] = SWI->getDefaultDest(); | ||||
SWI->setDefaultDest(SWI->getSuccessor(1)); | SWI->setDefaultDest(SWI->getSuccessor(1)); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
StackLifetime StackLifetimeAnalyzer(F, Allocas, | StackLifetime StackLifetimeAnalyzer(F, FrameData.Allocas, | ||||
StackLifetime::LivenessType::May); | StackLifetime::LivenessType::May); | ||||
StackLifetimeAnalyzer.run(); | StackLifetimeAnalyzer.run(); | ||||
auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) { | auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) { | ||||
return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps( | return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps( | ||||
StackLifetimeAnalyzer.getLiveRange(AI2)); | StackLifetimeAnalyzer.getLiveRange(AI2)); | ||||
}; | }; | ||||
auto GetAllocaSize = [&](const AllocaInst *AI) { | auto GetAllocaSize = [&](const AllocaInst *AI) { | ||||
Optional<uint64_t> RetSize = AI->getAllocationSizeInBits(DL); | Optional<uint64_t> RetSize = AI->getAllocationSizeInBits(DL); | ||||
assert(RetSize && "We can't handle scalable type now.\n"); | assert(RetSize && "We can't handle scalable type now.\n"); | ||||
return RetSize.getValue(); | return RetSize.getValue(); | ||||
}; | }; | ||||
// Put larger allocas in the front. So the larger allocas have higher | // Put larger allocas in the front. So the larger allocas have higher | ||||
// priority to merge, which can save more space potentially. Also each | // priority to merge, which can save more space potentially. Also each | ||||
// AllocaSet would be ordered. So we can get the largest Alloca in one | // AllocaSet would be ordered. So we can get the largest Alloca in one | ||||
// AllocaSet easily. | // AllocaSet easily. | ||||
sort(Allocas, [&](auto Iter1, auto Iter2) { | sort(FrameData.Allocas, [&](auto Iter1, auto Iter2) { | ||||
return GetAllocaSize(Iter1) > GetAllocaSize(Iter2); | return GetAllocaSize(Iter1) > GetAllocaSize(Iter2); | ||||
}); | }); | ||||
for (auto Alloca : Allocas) { | for (auto *Alloca : FrameData.Allocas) { | ||||
bool Merged = false; | bool Merged = false; | ||||
// Try to find if the Alloca is not inferenced with any existing | // Try to find if the Alloca is not inferenced with any existing | ||||
// NonOverlappedAllocaSet. If it is true, insert the alloca to that | // NonOverlappedAllocaSet. If it is true, insert the alloca to that | ||||
// NonOverlappedAllocaSet. | // NonOverlappedAllocaSet. | ||||
for (auto &AllocaSet : NonOverlapedAllocas) { | for (auto &AllocaSet : NonOverlapedAllocas) { | ||||
assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n"); | assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n"); | ||||
bool CouldMerge = none_of(AllocaSet, [&](auto Iter) { | bool CouldMerge = none_of(AllocaSet, [&](auto Iter) { | ||||
return IsAllocaInferenre(Alloca, Iter); | return IsAllocaInferenre(Alloca, Iter); | ||||
▲ Show 20 Lines • Show All 77 Lines • ▼ Show 20 Lines | for (auto &LayoutField : LayoutFields) { | ||||
// get from aligning to the field type's natural alignment. | // get from aligning to the field type's natural alignment. | ||||
assert(Offset >= LastOffset); | assert(Offset >= LastOffset); | ||||
if (Offset != LastOffset) { | if (Offset != LastOffset) { | ||||
if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset) | if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset) | ||||
FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context), | FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context), | ||||
Offset - LastOffset)); | Offset - LastOffset)); | ||||
} | } | ||||
// Record the layout information into both the Field and the | |||||
// original Spill, if there is one. | |||||
F.Offset = Offset; | F.Offset = Offset; | ||||
F.FieldIndex = FieldTypes.size(); | F.LayoutFieldIndex = FieldTypes.size(); | ||||
for (auto Spill : F.ForSpill) { | |||||
Spill->setFieldIndex(F.FieldIndex); | |||||
} | |||||
FieldTypes.push_back(F.Ty); | FieldTypes.push_back(F.Ty); | ||||
LastOffset = Offset + F.Size; | LastOffset = Offset + F.Size; | ||||
} | } | ||||
Ty->setBody(FieldTypes, Packed); | Ty->setBody(FieldTypes, Packed); | ||||
#ifndef NDEBUG | #ifndef NDEBUG | ||||
// Check that the IR layout matches the offsets we expect. | // Check that the IR layout matches the offsets we expect. | ||||
auto Layout = DL.getStructLayout(Ty); | auto Layout = DL.getStructLayout(Ty); | ||||
for (auto &F : Fields) { | for (auto &F : Fields) { | ||||
assert(Ty->getElementType(F.FieldIndex) == F.Ty); | assert(Ty->getElementType(F.LayoutFieldIndex) == F.Ty); | ||||
assert(Layout->getElementOffset(F.FieldIndex) == F.Offset); | assert(Layout->getElementOffset(F.LayoutFieldIndex) == F.Offset); | ||||
} | } | ||||
#endif | #endif | ||||
IsFinished = true; | IsFinished = true; | ||||
} | } | ||||
// Build a struct that will keep state for an active coroutine. | // Build a struct that will keep state for an active coroutine. | ||||
// struct f.frame { | // struct f.frame { | ||||
// ResumeFnTy ResumeFnAddr; | // ResumeFnTy ResumeFnAddr; | ||||
// ResumeFnTy DestroyFnAddr; | // ResumeFnTy DestroyFnAddr; | ||||
// int ResumeIndex; | // int ResumeIndex; | ||||
// ... promise (if present) ... | // ... promise (if present) ... | ||||
// ... spills ... | // ... spills ... | ||||
// }; | // }; | ||||
static StructType *buildFrameType(Function &F, coro::Shape &Shape, | static StructType *buildFrameType(Function &F, coro::Shape &Shape, | ||||
SpillInfo &Spills) { | FrameDataInfo &FrameData) { | ||||
LLVMContext &C = F.getContext(); | LLVMContext &C = F.getContext(); | ||||
const DataLayout &DL = F.getParent()->getDataLayout(); | const DataLayout &DL = F.getParent()->getDataLayout(); | ||||
StructType *FrameTy = [&] { | StructType *FrameTy = [&] { | ||||
SmallString<32> Name(F.getName()); | SmallString<32> Name(F.getName()); | ||||
Name.append(".Frame"); | Name.append(".Frame"); | ||||
return StructType::create(C, Name); | return StructType::create(C, Name); | ||||
}(); | }(); | ||||
FrameTypeBuilder B(C, DL); | FrameTypeBuilder B(C, DL); | ||||
AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); | AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); | ||||
Optional<FrameTypeBuilder::FieldId> PromiseFieldId; | Optional<FieldIDType> SwitchIndexFieldId; | ||||
Optional<FrameTypeBuilder::FieldId> SwitchIndexFieldId; | |||||
if (Shape.ABI == coro::ABI::Switch) { | if (Shape.ABI == coro::ABI::Switch) { | ||||
auto *FramePtrTy = FrameTy->getPointerTo(); | auto *FramePtrTy = FrameTy->getPointerTo(); | ||||
auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy, | auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy, | ||||
/*IsVarArg=*/false); | /*IsVarArg=*/false); | ||||
auto *FnPtrTy = FnTy->getPointerTo(); | auto *FnPtrTy = FnTy->getPointerTo(); | ||||
// Add header fields for the resume and destroy functions. | // Add header fields for the resume and destroy functions. | ||||
// We can rely on these being perfectly packed. | // We can rely on these being perfectly packed. | ||||
B.addField(FnPtrTy, None, {}, /*header*/ true); | (void)B.addField(FnPtrTy, None, /*header*/ true); | ||||
B.addField(FnPtrTy, None, {}, /*header*/ true); | (void)B.addField(FnPtrTy, None, /*header*/ true); | ||||
// Add a header field for the promise if there is one. | // PromiseAlloca field needs to be explicitly added here because it's | ||||
if (PromiseAlloca) { | // a header field with a fixed offset based on its alignment. Hence it | ||||
rjmccall: Well, it needs to be explicitly added here because it's a header field which has to have a… | |||||
PromiseFieldId = B.addFieldForAlloca(PromiseAlloca, {}, /*header*/ true); | // needs special handling and cannot be added to FrameData.Allocas. | ||||
} | if (PromiseAlloca) | ||||
FrameData.setFieldIndex( | |||||
PromiseAlloca, B.addFieldForAlloca(PromiseAlloca, /*header*/ true)); | |||||
// Add a field to store the suspend index. This doesn't need to | // Add a field to store the suspend index. This doesn't need to | ||||
// be in the header. | // be in the header. | ||||
unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); | unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); | ||||
Type *IndexType = Type::getIntNTy(C, IndexBits); | Type *IndexType = Type::getIntNTy(C, IndexBits); | ||||
SwitchIndexFieldId = B.addField(IndexType, None); | SwitchIndexFieldId = B.addField(IndexType, None); | ||||
} else { | } else { | ||||
assert(PromiseAlloca == nullptr && "lowering doesn't support promises"); | assert(PromiseAlloca == nullptr && "lowering doesn't support promises"); | ||||
} | } | ||||
// Because multiple allocas may own the same field slot, | // Because multiple allocas may own the same field slot, | ||||
// we add allocas to field here. | // we add allocas to field here. | ||||
B.addFieldForAllocas(F, Spills, Shape); | B.addFieldForAllocas(F, FrameData, Shape); | ||||
Value *CurrentDef = nullptr; | // Create an entry for every spilled value. | ||||
// Create an entry for every spilled value which is not an AllocaInst. | for (auto &S : FrameData.Spills) { | ||||
for (auto &S : Spills) { | FieldIDType Id = B.addField(S.first->getType(), None); | ||||
// We can have multiple entries in Spills for a single value, but | FrameData.setFieldIndex(S.first, Id); | ||||
// they should form a contiguous run. Ignore all but the first. | |||||
if (CurrentDef == S.def()) | |||||
continue; | |||||
CurrentDef = S.def(); | |||||
assert(CurrentDef != PromiseAlloca && | |||||
"recorded spill use of promise alloca?"); | |||||
if (!isa<AllocaInst>(CurrentDef)) { | |||||
Type *Ty = CurrentDef->getType(); | |||||
B.addField(Ty, None, {&S}); | |||||
} | |||||
} | } | ||||
B.finish(FrameTy); | B.finish(FrameTy); | ||||
FrameData.updateLayoutIndex(B); | |||||
Shape.FrameAlign = B.getStructAlign(); | Shape.FrameAlign = B.getStructAlign(); | ||||
Shape.FrameSize = B.getStructSize(); | Shape.FrameSize = B.getStructSize(); | ||||
switch (Shape.ABI) { | switch (Shape.ABI) { | ||||
// In the switch ABI, remember the field indices for the promise and | |||||
// switch-index fields. | |||||
case coro::ABI::Switch: | case coro::ABI::Switch: | ||||
// In the switch ABI, remember the switch-index field. | |||||
Not Done ReplyInline ActionsIt's just "index" now. rjmccall: It's just "index" now. | |||||
Shape.SwitchLowering.IndexField = | Shape.SwitchLowering.IndexField = | ||||
B.getFieldIndex(*SwitchIndexFieldId); | B.getLayoutFieldIndex(*SwitchIndexFieldId); | ||||
Shape.SwitchLowering.PromiseField = | |||||
(PromiseAlloca ? B.getFieldIndex(*PromiseFieldId) : 0); | |||||
// Also round the frame size up to a multiple of its alignment, as is | // Also round the frame size up to a multiple of its alignment, as is | ||||
// generally expected in C/C++. | // generally expected in C/C++. | ||||
Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign); | Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign); | ||||
break; | break; | ||||
// In the retcon ABI, remember whether the frame is inline in the storage. | // In the retcon ABI, remember whether the frame is inline in the storage. | ||||
case coro::ABI::Retcon: | case coro::ABI::Retcon: | ||||
▲ Show 20 Lines • Show All 151 Lines • ▼ Show 20 Lines | |||||
// AllocaSpillBB: | // AllocaSpillBB: | ||||
// ; geps corresponding to allocas that were moved to coroutine frame | // ; geps corresponding to allocas that were moved to coroutine frame | ||||
// br label PostSpill | // br label PostSpill | ||||
// | // | ||||
// PostSpill: | // PostSpill: | ||||
// whatever | // whatever | ||||
// | // | ||||
// | // | ||||
static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { | static Instruction *insertSpills(const FrameDataInfo &FrameData, | ||||
coro::Shape &Shape) { | |||||
auto *CB = Shape.CoroBegin; | auto *CB = Shape.CoroBegin; | ||||
LLVMContext &C = CB->getContext(); | LLVMContext &C = CB->getContext(); | ||||
IRBuilder<> Builder(CB->getNextNode()); | IRBuilder<> Builder(CB->getNextNode()); | ||||
StructType *FrameTy = Shape.FrameTy; | StructType *FrameTy = Shape.FrameTy; | ||||
PointerType *FramePtrTy = FrameTy->getPointerTo(); | PointerType *FramePtrTy = FrameTy->getPointerTo(); | ||||
auto *FramePtr = | auto *FramePtr = | ||||
cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr")); | cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr")); | ||||
DominatorTree DT(*CB->getFunction()); | DominatorTree DT(*CB->getFunction()); | ||||
Value *CurrentValue = nullptr; | |||||
BasicBlock *CurrentBlock = nullptr; | |||||
Value *CurrentReload = nullptr; | |||||
// Proper field number will be read from field definition. | |||||
unsigned Index = InvalidFieldIndex; | |||||
// We need to keep track of any allocas that need "spilling" | |||||
// since they will live in the coroutine frame now, all access to them | |||||
// need to be changed, not just the access across suspend points | |||||
// we remember allocas and their indices to be handled once we processed | |||||
// all the spills. | |||||
SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas; | |||||
// Promise alloca (if present) doesn't show in the spills and has a | |||||
// special field number. | |||||
if (auto *PromiseAlloca = Shape.getPromiseAlloca()) { | |||||
assert(Shape.ABI == coro::ABI::Switch); | |||||
Allocas.emplace_back(PromiseAlloca, Shape.getPromiseField()); | |||||
} | |||||
// Create a GEP with the given index into the coroutine frame for the original | // Create a GEP with the given index into the coroutine frame for the original | ||||
// value Orig. Appends an extra 0 index for array-allocas, preserving the | // value Orig. Appends an extra 0 index for array-allocas, preserving the | ||||
// original type. | // original type. | ||||
auto GetFramePointer = [&](uint32_t Index, Value *Orig) -> Value * { | auto GetFramePointer = [&](Value *Orig) -> Value * { | ||||
FieldIDType Index = FrameData.getFieldIndex(Orig); | |||||
SmallVector<Value *, 3> Indices = { | SmallVector<Value *, 3> Indices = { | ||||
ConstantInt::get(Type::getInt32Ty(C), 0), | ConstantInt::get(Type::getInt32Ty(C), 0), | ||||
ConstantInt::get(Type::getInt32Ty(C), Index), | ConstantInt::get(Type::getInt32Ty(C), Index), | ||||
}; | }; | ||||
if (auto *AI = dyn_cast<AllocaInst>(Orig)) { | if (auto *AI = dyn_cast<AllocaInst>(Orig)) { | ||||
if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) { | if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) { | ||||
auto Count = CI->getValue().getZExtValue(); | auto Count = CI->getValue().getZExtValue(); | ||||
Show All 13 Lines | if (isa<AllocaInst>(Orig)) { | ||||
// AllocaInst. So we cast the GEP to the type of AllocaInst. | // AllocaInst. So we cast the GEP to the type of AllocaInst. | ||||
if (GEP->getResultElementType() != Orig->getType()) | if (GEP->getResultElementType() != Orig->getType()) | ||||
return Builder.CreateBitCast(GEP, Orig->getType(), | return Builder.CreateBitCast(GEP, Orig->getType(), | ||||
Orig->getName() + Twine(".cast")); | Orig->getName() + Twine(".cast")); | ||||
} | } | ||||
return GEP; | return GEP; | ||||
}; | }; | ||||
// Create a load instruction to reload the spilled value from the coroutine | for (auto const &E : FrameData.Spills) { | ||||
// frame. Populates the Value pointer reference provided with the frame GEP. | Value *Def = E.first; | ||||
auto CreateReload = [&](Instruction *InsertBefore, Value *&G) { | // Create a store instruction storing the value into the | ||||
assert(Index != InvalidFieldIndex && "accessing unassigned field number"); | |||||
Builder.SetInsertPoint(InsertBefore); | |||||
G = GetFramePointer(Index, CurrentValue); | |||||
G->setName(CurrentValue->getName() + Twine(".reload.addr")); | |||||
return isa<AllocaInst>(CurrentValue) | |||||
? G | |||||
: Builder.CreateLoad(FrameTy->getElementType(Index), G, | |||||
CurrentValue->getName() + Twine(".reload")); | |||||
}; | |||||
Value *GEP = nullptr, *CurrentGEP = nullptr; | |||||
for (auto const &E : Spills) { | |||||
// If we have not seen the value, generate a spill. | |||||
if (CurrentValue != E.def()) { | |||||
CurrentValue = E.def(); | |||||
CurrentBlock = nullptr; | |||||
CurrentReload = nullptr; | |||||
Index = E.fieldIndex(); | |||||
if (auto *AI = dyn_cast<AllocaInst>(CurrentValue)) { | |||||
// Spilled AllocaInst will be replaced with GEP from the coroutine frame | |||||
// there is no spill required. | |||||
Allocas.emplace_back(AI, Index); | |||||
if (!AI->isStaticAlloca()) | |||||
report_fatal_error("Coroutines cannot handle non static allocas yet"); | |||||
} else { | |||||
// Otherwise, create a store instruction storing the value into the | |||||
// coroutine frame. | // coroutine frame. | ||||
Instruction *InsertPt = nullptr; | Instruction *InsertPt = nullptr; | ||||
if (auto Arg = dyn_cast<Argument>(CurrentValue)) { | if (auto *Arg = dyn_cast<Argument>(Def)) { | ||||
// For arguments, we will place the store instruction right after | // For arguments, we will place the store instruction right after | ||||
// the coroutine frame pointer instruction, i.e. bitcast of | // the coroutine frame pointer instruction, i.e. bitcast of | ||||
// coro.begin from i8* to %f.frame*. | // coro.begin from i8* to %f.frame*. | ||||
InsertPt = FramePtr->getNextNode(); | InsertPt = FramePtr->getNextNode(); | ||||
// If we're spilling an Argument, make sure we clear 'nocapture' | // If we're spilling an Argument, make sure we clear 'nocapture' | ||||
// from the coroutine function. | // from the coroutine function. | ||||
Arg->getParent()->removeParamAttr(Arg->getArgNo(), | Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture); | ||||
Attribute::NoCapture); | |||||
} else if (auto CSI = dyn_cast<AnyCoroSuspendInst>(CurrentValue)) { | } else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) { | ||||
// Don't spill immediately after a suspend; splitting assumes | // Don't spill immediately after a suspend; splitting assumes | ||||
// that the suspend will be followed by a branch. | // that the suspend will be followed by a branch. | ||||
InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI(); | InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI(); | ||||
} else { | } else { | ||||
auto *I = cast<Instruction>(CurrentValue); | auto *I = cast<Instruction>(Def); | ||||
if (!DT.dominates(CB, I)) { | if (!DT.dominates(CB, I)) { | ||||
// If it is not dominated by CoroBegin, then spill should be | // If it is not dominated by CoroBegin, then spill should be | ||||
// inserted immediately after CoroFrame is computed. | // inserted immediately after CoroFrame is computed. | ||||
InsertPt = FramePtr->getNextNode(); | InsertPt = FramePtr->getNextNode(); | ||||
} else if (auto *II = dyn_cast<InvokeInst>(I)) { | } else if (auto *II = dyn_cast<InvokeInst>(I)) { | ||||
// If we are spilling the result of the invoke instruction, split | // If we are spilling the result of the invoke instruction, split | ||||
// the normal edge and insert the spill in the new block. | // the normal edge and insert the spill in the new block. | ||||
auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest()); | auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest()); | ||||
InsertPt = NewBB->getTerminator(); | InsertPt = NewBB->getTerminator(); | ||||
} else if (isa<PHINode>(I)) { | } else if (isa<PHINode>(I)) { | ||||
// Skip the PHINodes and EH pads instructions. | // Skip the PHINodes and EH pads instructions. | ||||
BasicBlock *DefBlock = I->getParent(); | BasicBlock *DefBlock = I->getParent(); | ||||
if (auto *CSI = | if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator())) | ||||
dyn_cast<CatchSwitchInst>(DefBlock->getTerminator())) | |||||
InsertPt = splitBeforeCatchSwitch(CSI); | InsertPt = splitBeforeCatchSwitch(CSI); | ||||
else | else | ||||
InsertPt = &*DefBlock->getFirstInsertionPt(); | InsertPt = &*DefBlock->getFirstInsertionPt(); | ||||
} else { | } else { | ||||
assert(!I->isTerminator() && "unexpected terminator"); | assert(!I->isTerminator() && "unexpected terminator"); | ||||
// For all other values, the spill is placed immediately after | // For all other values, the spill is placed immediately after | ||||
// the definition. | // the definition. | ||||
InsertPt = I->getNextNode(); | InsertPt = I->getNextNode(); | ||||
} | } | ||||
} | } | ||||
auto Index = FrameData.getFieldIndex(Def); | |||||
Builder.SetInsertPoint(InsertPt); | Builder.SetInsertPoint(InsertPt); | ||||
auto *G = Builder.CreateConstInBoundsGEP2_32( | auto *G = Builder.CreateConstInBoundsGEP2_32( | ||||
FrameTy, FramePtr, 0, Index, | FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr")); | ||||
CurrentValue->getName() + Twine(".spill.addr")); | Builder.CreateStore(Def, G); | ||||
Builder.CreateStore(CurrentValue, G); | |||||
} | |||||
} | |||||
// If we have not seen the use block, generate a reload in it. | BasicBlock *CurrentBlock = nullptr; | ||||
if (CurrentBlock != E.userBlock()) { | Value *CurrentReload = nullptr; | ||||
CurrentBlock = E.userBlock(); | for (auto *U : E.second) { | ||||
CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt(), GEP); | // If we have not seen the use block, create a load instruction to reload | ||||
} | // the spilled value from the coroutine frame. Populates the Value pointer | ||||
// reference provided with the frame GEP. | |||||
// If we have a single edge PHINode, remove it and replace it with a reload | if (CurrentBlock != U->getParent()) { | ||||
// from the coroutine frame. (We already took care of multi edge PHINodes | CurrentBlock = U->getParent(); | ||||
// by rewriting them in the rewritePHIs function). | Builder.SetInsertPoint(&*CurrentBlock->getFirstInsertionPt()); | ||||
if (auto *PN = dyn_cast<PHINode>(E.user())) { | |||||
assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming " | auto *GEP = GetFramePointer(E.first); | ||||
GEP->setName(E.first->getName() + Twine(".reload.addr")); | |||||
CurrentReload = Builder.CreateLoad( | |||||
FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP, | |||||
E.first->getName() + Twine(".reload")); | |||||
} | |||||
// If we have a single edge PHINode, remove it and replace it with a | |||||
// reload from the coroutine frame. (We already took care of multi edge | |||||
// PHINodes by rewriting them in the rewritePHIs function). | |||||
if (auto *PN = dyn_cast<PHINode>(U)) { | |||||
assert(PN->getNumIncomingValues() == 1 && | |||||
"unexpected number of incoming " | |||||
"values in the PHINode"); | "values in the PHINode"); | ||||
PN->replaceAllUsesWith(CurrentReload); | PN->replaceAllUsesWith(CurrentReload); | ||||
PN->eraseFromParent(); | PN->eraseFromParent(); | ||||
continue; | continue; | ||||
} | } | ||||
// If we have not seen this GEP instruction, migrate any dbg.declare from | // Replace all uses of CurrentValue in the current instruction with | ||||
Not Done ReplyInline ActionsI'm confusing about the comment. It says it would migrate dbg.declare from alloca. But how could it know the CurrentValue must be alloca from the context ? ChuanqiXu: I'm confusing about the comment. It says it would migrate dbg.declare from alloca. But how… | |||||
I think dbg.declare instructions were all generated for alloca instructions. lxfind: I think dbg.declare instructions were all generated for alloca instructions. | |||||
// the alloca to it. | // reload. | ||||
if (CurrentGEP != GEP) { | U->replaceUsesOfWith(Def, CurrentReload); | ||||
CurrentGEP = GEP; | |||||
TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(CurrentValue); | |||||
if (!DIs.empty()) | |||||
DIBuilder(*CurrentBlock->getParent()->getParent(), | |||||
/*AllowUnresolved*/ false) | |||||
.insertDeclare(CurrentGEP, DIs.front()->getVariable(), | |||||
DIs.front()->getExpression(), | |||||
DIs.front()->getDebugLoc(), DIs.front()); | |||||
} | } | ||||
// Replace all uses of CurrentValue in the current instruction with reload. | |||||
E.user()->replaceUsesOfWith(CurrentValue, CurrentReload); | |||||
} | } | ||||
BasicBlock *FramePtrBB = FramePtr->getParent(); | BasicBlock *FramePtrBB = FramePtr->getParent(); | ||||
auto SpillBlock = | auto SpillBlock = | ||||
FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); | FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); | ||||
SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill"); | SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill"); | ||||
Shape.AllocaSpillBlock = SpillBlock; | Shape.AllocaSpillBlock = SpillBlock; | ||||
// retcon and retcon.once lowering assumes all uses have been sunk. | // retcon and retcon.once lowering assumes all uses have been sunk. | ||||
if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) { | if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) { | ||||
// If we found any allocas, replace all of their remaining uses with Geps. | // If we found any allocas, replace all of their remaining uses with Geps. | ||||
Builder.SetInsertPoint(&SpillBlock->front()); | Builder.SetInsertPoint(&SpillBlock->front()); | ||||
for (auto &P : Allocas) { | for (const auto &P : FrameData.Allocas) { | ||||
auto *G = GetFramePointer(P.second, P.first); | auto *G = GetFramePointer(P); | ||||
// We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G)) | // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G)) | ||||
// here, as we are changing location of the instruction. | // here, as we are changing location of the instruction. | ||||
G->takeName(P.first); | G->takeName(P); | ||||
P.first->replaceAllUsesWith(G); | P->replaceAllUsesWith(G); | ||||
P.first->eraseFromParent(); | P->eraseFromParent(); | ||||
} | } | ||||
return FramePtr; | return FramePtr; | ||||
} | } | ||||
// If we found any alloca, replace all of their remaining uses with GEP | // If we found any alloca, replace all of their remaining uses with GEP | ||||
// instructions. Because new dbg.declare have been created for these alloca, | // instructions. Because new dbg.declare have been created for these alloca, | ||||
// we also delete the original dbg.declare and replace other uses with undef. | // we also delete the original dbg.declare and replace other uses with undef. | ||||
// Note: We cannot replace the alloca with GEP instructions indiscriminately, | // Note: We cannot replace the alloca with GEP instructions indiscriminately, | ||||
// as some of the uses may not be dominated by CoroBegin. | // as some of the uses may not be dominated by CoroBegin. | ||||
bool MightNeedToCopy = false; | bool MightNeedToCopy = false; | ||||
Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); | Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); | ||||
SmallVector<Instruction *, 4> UsersToUpdate; | SmallVector<Instruction *, 4> UsersToUpdate; | ||||
for (auto &P : Allocas) { | for (AllocaInst *A : FrameData.Allocas) { | ||||
AllocaInst *const A = P.first; | |||||
for (auto *DI : FindDbgDeclareUses(A)) | |||||
DI->eraseFromParent(); | |||||
replaceDbgUsesWithUndef(A); | |||||
UsersToUpdate.clear(); | UsersToUpdate.clear(); | ||||
for (User *U : A->users()) { | for (User *U : A->users()) { | ||||
auto *I = cast<Instruction>(U); | auto *I = cast<Instruction>(U); | ||||
if (DT.dominates(CB, I)) | if (DT.dominates(CB, I)) | ||||
UsersToUpdate.push_back(I); | UsersToUpdate.push_back(I); | ||||
else | else | ||||
MightNeedToCopy = true; | MightNeedToCopy = true; | ||||
} | } | ||||
if (!UsersToUpdate.empty()) { | if (!UsersToUpdate.empty()) { | ||||
auto *G = GetFramePointer(P.second, A); | auto *G = GetFramePointer(A); | ||||
G->takeName(A); | G->setName(A->getName() + Twine(".reload.addr")); | ||||
TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(A); | |||||
if (!DIs.empty()) | |||||
DIBuilder(*A->getModule(), | |||||
/*AllowUnresolved*/ false) | |||||
.insertDeclare(G, DIs.front()->getVariable(), | |||||
DIs.front()->getExpression(), | |||||
DIs.front()->getDebugLoc(), DIs.front()); | |||||
for (auto *DI : FindDbgDeclareUses(A)) | |||||
DI->eraseFromParent(); | |||||
replaceDbgUsesWithUndef(A); | |||||
for (Instruction *I : UsersToUpdate) | for (Instruction *I : UsersToUpdate) | ||||
I->replaceUsesOfWith(A, G); | I->replaceUsesOfWith(A, G); | ||||
} | } | ||||
} | } | ||||
// If we discovered such uses not dominated by CoroBegin, see if any of them | // If we discovered such uses not dominated by CoroBegin, see if any of them | ||||
// preceed coro begin and have instructions that can modify the | // preceed coro begin and have instructions that can modify the | ||||
// value of the alloca and therefore would require a copying the value into | // value of the alloca and therefore would require a copying the value into | ||||
// the spill slot in the coroutine frame. | // the spill slot in the coroutine frame. | ||||
if (MightNeedToCopy) { | if (MightNeedToCopy) { | ||||
Builder.SetInsertPoint(FramePtr->getNextNode()); | Builder.SetInsertPoint(FramePtr->getNextNode()); | ||||
for (auto &P : Allocas) { | for (AllocaInst *A : FrameData.Allocas) { | ||||
AllocaInst *const A = P.first; | |||||
AllocaUseVisitor Visitor(A->getModule()->getDataLayout(), DT, *CB); | AllocaUseVisitor Visitor(A->getModule()->getDataLayout(), DT, *CB); | ||||
auto PtrI = Visitor.visitPtr(*A); | auto PtrI = Visitor.visitPtr(*A); | ||||
assert(!PtrI.isAborted()); | assert(!PtrI.isAborted()); | ||||
if (PtrI.isEscaped()) { | if (PtrI.isEscaped()) { | ||||
// isEscaped really means potentially modified before CoroBegin. | // isEscaped really means potentially modified before CoroBegin. | ||||
if (A->isArrayAllocation()) | if (A->isArrayAllocation()) | ||||
report_fatal_error( | report_fatal_error( | ||||
"Coroutines cannot handle copying of array allocas yet"); | "Coroutines cannot handle copying of array allocas yet"); | ||||
auto *G = GetFramePointer(P.second, A); | auto *G = GetFramePointer(A); | ||||
auto *Value = Builder.CreateLoad(A->getAllocatedType(), A); | auto *Value = Builder.CreateLoad(A->getAllocatedType(), A); | ||||
Builder.CreateStore(Value, G); | Builder.CreateStore(Value, G); | ||||
} | } | ||||
// For each alias to Alloca created before CoroBegin but used after | // For each alias to Alloca created before CoroBegin but used after | ||||
// CoroBegin, we recreate them after CoroBegin by appplying the offset | // CoroBegin, we recreate them after CoroBegin by appplying the offset | ||||
// to the pointer in the frame. | // to the pointer in the frame. | ||||
for (const auto &Alias : Visitor.getAliases()) { | for (const auto &Alias : Visitor.getAliases()) { | ||||
auto *FramePtr = GetFramePointer(P.second, A); | auto *FramePtr = GetFramePointer(A); | ||||
auto *FramePtrRaw = | auto *FramePtrRaw = | ||||
Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C)); | Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C)); | ||||
auto *AliasPtr = Builder.CreateGEP( | auto *AliasPtr = Builder.CreateGEP( | ||||
FramePtrRaw, ConstantInt::get(Type::getInt64Ty(C), Alias.second)); | FramePtrRaw, ConstantInt::get(Type::getInt64Ty(C), Alias.second)); | ||||
auto *AliasPtrTyped = | auto *AliasPtrTyped = | ||||
Builder.CreateBitCast(AliasPtr, Alias.first->getType()); | Builder.CreateBitCast(AliasPtr, Alias.first->getType()); | ||||
Alias.first->replaceUsesWithIf( | Alias.first->replaceUsesWithIf( | ||||
AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); }); | AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); }); | ||||
▲ Show 20 Lines • Show All 263 Lines • ▼ Show 20 Lines | |||||
static bool isCoroutineStructureIntrinsic(Instruction &I) { | static bool isCoroutineStructureIntrinsic(Instruction &I) { | ||||
return isa<CoroIdInst>(&I) || isa<CoroSaveInst>(&I) || | return isa<CoroIdInst>(&I) || isa<CoroSaveInst>(&I) || | ||||
isa<CoroSuspendInst>(&I); | isa<CoroSuspendInst>(&I); | ||||
} | } | ||||
// For every use of the value that is across suspend point, recreate that value | // For every use of the value that is across suspend point, recreate that value | ||||
// after a suspend point. | // after a suspend point. | ||||
static void rewriteMaterializableInstructions(IRBuilder<> &IRB, | static void rewriteMaterializableInstructions(IRBuilder<> &IRB, | ||||
SpillInfo const &Spills) { | const SpillInfo &Spills) { | ||||
for (const auto &E : Spills) { | |||||
Value *Def = E.first; | |||||
BasicBlock *CurrentBlock = nullptr; | BasicBlock *CurrentBlock = nullptr; | ||||
Instruction *CurrentMaterialization = nullptr; | Instruction *CurrentMaterialization = nullptr; | ||||
Instruction *CurrentDef = nullptr; | for (Instruction *U : E.second) { | ||||
for (auto const &E : Spills) { | |||||
// If it is a new definition, update CurrentXXX variables. | |||||
if (CurrentDef != E.def()) { | |||||
CurrentDef = cast<Instruction>(E.def()); | |||||
CurrentBlock = nullptr; | |||||
CurrentMaterialization = nullptr; | |||||
} | |||||
// If we have not seen this block, materialize the value. | // If we have not seen this block, materialize the value. | ||||
if (CurrentBlock != E.userBlock()) { | if (CurrentBlock != U->getParent()) { | ||||
CurrentBlock = E.userBlock(); | CurrentBlock = U->getParent(); | ||||
CurrentMaterialization = cast<Instruction>(CurrentDef)->clone(); | CurrentMaterialization = cast<Instruction>(Def)->clone(); | ||||
CurrentMaterialization->setName(CurrentDef->getName()); | CurrentMaterialization->setName(Def->getName()); | ||||
CurrentMaterialization->insertBefore( | CurrentMaterialization->insertBefore( | ||||
&*CurrentBlock->getFirstInsertionPt()); | &*CurrentBlock->getFirstInsertionPt()); | ||||
} | } | ||||
if (auto *PN = dyn_cast<PHINode>(U)) { | |||||
if (auto *PN = dyn_cast<PHINode>(E.user())) { | assert(PN->getNumIncomingValues() == 1 && | ||||
assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming " | "unexpected number of incoming " | ||||
"values in the PHINode"); | "values in the PHINode"); | ||||
PN->replaceAllUsesWith(CurrentMaterialization); | PN->replaceAllUsesWith(CurrentMaterialization); | ||||
PN->eraseFromParent(); | PN->eraseFromParent(); | ||||
continue; | continue; | ||||
} | } | ||||
// Replace all uses of Def in the current instruction with the | |||||
// Replace all uses of CurrentDef in the current instruction with the | |||||
// CurrentMaterialization for the block. | // CurrentMaterialization for the block. | ||||
E.user()->replaceUsesOfWith(CurrentDef, CurrentMaterialization); | U->replaceUsesOfWith(Def, CurrentMaterialization); | ||||
} | |||||
} | } | ||||
} | } | ||||
// Splits the block at a particular instruction unless it is the first | // Splits the block at a particular instruction unless it is the first | ||||
// instruction in the block with a single predecessor. | // instruction in the block with a single predecessor. | ||||
static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) { | static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) { | ||||
auto *BB = I->getParent(); | auto *BB = I->getParent(); | ||||
if (&BB->front() == I) { | if (&BB->front() == I) { | ||||
▲ Show 20 Lines • Show All 322 Lines • ▼ Show 20 Lines | static void eliminateSwiftError(Function &F, coro::Shape &Shape) { | ||||
if (!AllocasToPromote.empty()) { | if (!AllocasToPromote.empty()) { | ||||
DominatorTree DT(F); | DominatorTree DT(F); | ||||
PromoteMemToReg(AllocasToPromote, DT); | PromoteMemToReg(AllocasToPromote, DT); | ||||
} | } | ||||
} | } | ||||
/// retcon and retcon.once conventions assume that all spill uses can be sunk | /// retcon and retcon.once conventions assume that all spill uses can be sunk | ||||
/// after the coro.begin intrinsic. | /// after the coro.begin intrinsic. | ||||
static void sinkSpillUsesAfterCoroBegin(Function &F, const SpillInfo &Spills, | static void sinkSpillUsesAfterCoroBegin(Function &F, | ||||
const FrameDataInfo &FrameData, | |||||
CoroBeginInst *CoroBegin) { | CoroBeginInst *CoroBegin) { | ||||
DominatorTree Dom(F); | DominatorTree Dom(F); | ||||
SmallSetVector<Instruction *, 32> ToMove; | SmallSetVector<Instruction *, 32> ToMove; | ||||
SmallVector<Instruction *, 32> Worklist; | SmallVector<Instruction *, 32> Worklist; | ||||
// Collect all users that precede coro.begin. | // Collect all users that precede coro.begin. | ||||
for (auto const &Entry : Spills) { | for (auto *Def : FrameData.getAllDefs()) { | ||||
auto *SpillDef = Entry.def(); | for (User *U : Def->users()) { | ||||
for (User *U : SpillDef->users()) { | |||||
auto Inst = cast<Instruction>(U); | auto Inst = cast<Instruction>(U); | ||||
if (Inst->getParent() != CoroBegin->getParent() || | if (Inst->getParent() != CoroBegin->getParent() || | ||||
Dom.dominates(CoroBegin, Inst)) | Dom.dominates(CoroBegin, Inst)) | ||||
continue; | continue; | ||||
if (ToMove.insert(Inst)) | if (ToMove.insert(Inst)) | ||||
Worklist.push_back(Inst); | Worklist.push_back(Inst); | ||||
} | } | ||||
} | } | ||||
▲ Show 20 Lines • Show All 109 Lines • ▼ Show 20 Lines | for (BasicBlock *DomBB : DomSet) { | ||||
S->eraseFromParent(); | S->eraseFromParent(); | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
} | } | ||||
static void collectFrameAllocas(Function &F, coro::Shape &Shape, | |||||
SuspendCrossingInfo &Checker, | |||||
SmallVectorImpl<AllocaInst *> &Allocas) { | |||||
// Collect lifetime.start info for each alloca. | |||||
using LifetimeStart = SmallPtrSet<Instruction *, 2>; | |||||
llvm::DenseMap<AllocaInst *, std::unique_ptr<LifetimeStart>> LifetimeMap; | |||||
for (Instruction &I : instructions(F)) { | |||||
auto *II = dyn_cast<IntrinsicInst>(&I); | |||||
if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start) | |||||
continue; | |||||
if (auto *OpInst = dyn_cast<Instruction>(II->getOperand(1))) { | |||||
if (auto *AI = dyn_cast<AllocaInst>(OpInst->stripPointerCasts())) { | |||||
if (LifetimeMap.find(AI) == LifetimeMap.end()) | |||||
LifetimeMap[AI] = std::make_unique<LifetimeStart>(); | |||||
LifetimeMap[AI]->insert(isa<AllocaInst>(OpInst) ? II : OpInst); | |||||
} | |||||
} | |||||
} | |||||
for (Instruction &I : instructions(F)) { | |||||
auto *AI = dyn_cast<AllocaInst>(&I); | |||||
if (!AI) | |||||
continue; | |||||
// The PromiseAlloca will be specially handled since it needs to be in a | |||||
// fixed position in the frame. | |||||
if (AI == Shape.SwitchLowering.PromiseAlloca) { | |||||
continue; | |||||
} | |||||
auto Iter = LifetimeMap.find(AI); | |||||
for (User *U : I.users()) { | |||||
bool ShouldLiveOnFrame = false; | |||||
// Check against lifetime.start if the instruction has the info. | |||||
if (Iter != LifetimeMap.end()) | |||||
for (auto *S : *Iter->second) { | |||||
if ((ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(*S, U))) | |||||
break; | |||||
} | |||||
else | |||||
ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(I, U); | |||||
Not Done ReplyInline ActionsI see that this seems to be the existing algorithm, and I know this is intended to be a refactoring patch rather than a functional one, so I'll let this go without comment beyond expressing my eagerness to see a follow-up. :) rjmccall: I see that this seems to be the existing algorithm, and I know this is intended to be a… | |||||
if (ShouldLiveOnFrame) { | |||||
Allocas.push_back(AI); | |||||
break; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
void coro::buildCoroutineFrame(Function &F, Shape &Shape) { | void coro::buildCoroutineFrame(Function &F, Shape &Shape) { | ||||
eliminateSwiftError(F, Shape); | eliminateSwiftError(F, Shape); | ||||
if (Shape.ABI == coro::ABI::Switch && | if (Shape.ABI == coro::ABI::Switch && | ||||
Shape.SwitchLowering.PromiseAlloca) { | Shape.SwitchLowering.PromiseAlloca) { | ||||
Shape.getSwitchCoroId()->clearPromise(); | Shape.getSwitchCoroId()->clearPromise(); | ||||
} | } | ||||
Show All 13 Lines | void coro::buildCoroutineFrame(Function &F, Shape &Shape) { | ||||
// Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will | // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will | ||||
// never has its definition separated from the PHI by the suspend point. | // never has its definition separated from the PHI by the suspend point. | ||||
rewritePHIs(F); | rewritePHIs(F); | ||||
// Build suspend crossing info. | // Build suspend crossing info. | ||||
SuspendCrossingInfo Checker(F, Shape); | SuspendCrossingInfo Checker(F, Shape); | ||||
IRBuilder<> Builder(F.getContext()); | IRBuilder<> Builder(F.getContext()); | ||||
SpillInfo Spills; | FrameDataInfo FrameData; | ||||
SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas; | SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas; | ||||
SmallVector<Instruction*, 4> DeadInstructions; | SmallVector<Instruction*, 4> DeadInstructions; | ||||
{ | |||||
SpillInfo Spills; | |||||
for (int Repeat = 0; Repeat < 4; ++Repeat) { | for (int Repeat = 0; Repeat < 4; ++Repeat) { | ||||
// See if there are materializable instructions across suspend points. | // See if there are materializable instructions across suspend points. | ||||
for (Instruction &I : instructions(F)) | for (Instruction &I : instructions(F)) | ||||
if (materializable(I)) | if (materializable(I)) | ||||
for (User *U : I.users()) | for (User *U : I.users()) | ||||
if (Checker.isDefinitionAcrossSuspend(I, U)) | if (Checker.isDefinitionAcrossSuspend(I, U)) | ||||
Spills.emplace_back(&I, U); | Spills[&I].push_back(cast<Instruction>(U)); | ||||
if (Spills.empty()) | if (Spills.empty()) | ||||
break; | break; | ||||
// Rewrite materializable instructions to be materialized at the use point. | // Rewrite materializable instructions to be materialized at the use | ||||
LLVM_DEBUG(dump("Materializations", Spills)); | // point. | ||||
LLVM_DEBUG(dumpSpills("Materializations", Spills)); | |||||
rewriteMaterializableInstructions(Builder, Spills); | rewriteMaterializableInstructions(Builder, Spills); | ||||
Spills.clear(); | Spills.clear(); | ||||
} | } | ||||
} | |||||
sinkLifetimeStartMarkers(F, Shape, Checker); | sinkLifetimeStartMarkers(F, Shape, Checker); | ||||
// Collect lifetime.start info for each alloca. | collectFrameAllocas(F, Shape, Checker, FrameData.Allocas); | ||||
using LifetimeStart = SmallPtrSet<Instruction *, 2>; | LLVM_DEBUG(dumpAllocas(FrameData.Allocas)); | ||||
llvm::DenseMap<Instruction *, std::unique_ptr<LifetimeStart>> LifetimeMap; | |||||
for (Instruction &I : instructions(F)) { | |||||
auto *II = dyn_cast<IntrinsicInst>(&I); | |||||
if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start) | |||||
continue; | |||||
if (auto *OpInst = dyn_cast<Instruction>(II->getOperand(1))) { | |||||
if (auto *AI = dyn_cast<AllocaInst>(OpInst->stripPointerCasts())) { | |||||
if (LifetimeMap.find(AI) == LifetimeMap.end()) | |||||
LifetimeMap[AI] = std::make_unique<LifetimeStart>(); | |||||
LifetimeMap[AI]->insert(isa<AllocaInst>(OpInst) ? II : OpInst); | |||||
} | |||||
} | |||||
} | |||||
// Collect the spills for arguments and other not-materializable values. | // Collect the spills for arguments and other not-materializable values. | ||||
for (Argument &A : F.args()) | for (Argument &A : F.args()) | ||||
for (User *U : A.users()) | for (User *U : A.users()) | ||||
if (Checker.isDefinitionAcrossSuspend(A, U)) | if (Checker.isDefinitionAcrossSuspend(A, U)) | ||||
Spills.emplace_back(&A, U); | FrameData.Spills[&A].push_back(cast<Instruction>(U)); | ||||
for (Instruction &I : instructions(F)) { | for (Instruction &I : instructions(F)) { | ||||
// Values returned from coroutine structure intrinsics should not be part | // Values returned from coroutine structure intrinsics should not be part | ||||
// of the Coroutine Frame. | // of the Coroutine Frame. | ||||
if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin) | if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin) | ||||
continue; | continue; | ||||
// The Coroutine Promise always included into coroutine frame, no need to | // The Coroutine Promise always included into coroutine frame, no need to | ||||
Show All 14 Lines | if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) { | ||||
// the rewritten value. The rewrite doesn't invalidate anything in | // the rewritten value. The rewrite doesn't invalidate anything in | ||||
// Spills because the other alloca intrinsics have no other operands | // Spills because the other alloca intrinsics have no other operands | ||||
// besides AI, and it doesn't invalidate the iteration because we delay | // besides AI, and it doesn't invalidate the iteration because we delay | ||||
// erasing AI. | // erasing AI. | ||||
auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions); | auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions); | ||||
for (User *U : Alloc->users()) { | for (User *U : Alloc->users()) { | ||||
if (Checker.isDefinitionAcrossSuspend(*Alloc, U)) | if (Checker.isDefinitionAcrossSuspend(*Alloc, U)) | ||||
Spills.emplace_back(Alloc, U); | FrameData.Spills[Alloc].push_back(cast<Instruction>(U)); | ||||
} | } | ||||
continue; | continue; | ||||
} | } | ||||
// Ignore alloca.get; we process this as part of coro.alloca.alloc. | // Ignore alloca.get; we process this as part of coro.alloca.alloc. | ||||
if (isa<CoroAllocaGetInst>(I)) { | if (isa<CoroAllocaGetInst>(I)) | ||||
continue; | continue; | ||||
} | |||||
auto Iter = LifetimeMap.find(&I); | |||||
for (User *U : I.users()) { | |||||
bool NeedSpill = false; | |||||
// Check against lifetime.start if the instruction has the info. | if (isa<AllocaInst>(I)) | ||||
if (Iter != LifetimeMap.end()) | continue; | ||||
for (auto *S : *Iter->second) { | |||||
if ((NeedSpill = Checker.isDefinitionAcrossSuspend(*S, U))) | |||||
break; | |||||
} | |||||
else | |||||
NeedSpill = Checker.isDefinitionAcrossSuspend(I, U); | |||||
if (NeedSpill) { | for (User *U : I.users()) | ||||
if (Checker.isDefinitionAcrossSuspend(I, U)) { | |||||
// We cannot spill a token. | // We cannot spill a token. | ||||
if (I.getType()->isTokenTy()) | if (I.getType()->isTokenTy()) | ||||
report_fatal_error( | report_fatal_error( | ||||
"token definition is separated from the use by a suspend point"); | "token definition is separated from the use by a suspend point"); | ||||
Spills.emplace_back(&I, U); | FrameData.Spills[&I].push_back(cast<Instruction>(U)); | ||||
} | |||||
} | } | ||||
} | } | ||||
LLVM_DEBUG(dump("Spills", Spills)); | LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills)); | ||||
if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) | if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) | ||||
sinkSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin); | sinkSpillUsesAfterCoroBegin(F, FrameData, Shape.CoroBegin); | ||||
Shape.FrameTy = buildFrameType(F, Shape, Spills); | Shape.FrameTy = buildFrameType(F, Shape, FrameData); | ||||
Shape.FramePtr = insertSpills(Spills, Shape); | // Add PromiseAlloca to Allocas list so that it is processed in insertSpills. | ||||
if (Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.PromiseAlloca) | |||||
FrameData.Allocas.push_back(Shape.SwitchLowering.PromiseAlloca); | |||||
Shape.FramePtr = insertSpills(FrameData, Shape); | |||||
lowerLocalAllocas(LocalAllocas, DeadInstructions); | lowerLocalAllocas(LocalAllocas, DeadInstructions); | ||||
for (auto I : DeadInstructions) | for (auto I : DeadInstructions) | ||||
I->eraseFromParent(); | I->eraseFromParent(); | ||||
} | } |
Well, it needs to be explicitly added here because it's a header field which has to have a fixed offset based on its alignment, which is *why* it's not in FrameData.Allocas.