Index: lib/Transforms/Coroutines/CoroFrame.cpp =================================================================== --- lib/Transforms/Coroutines/CoroFrame.cpp +++ lib/Transforms/Coroutines/CoroFrame.cpp @@ -270,11 +270,22 @@ Instruction *user() const { return second; } BasicBlock *userBlock() const { return second->getParent(); } - std::pair getKey() const { - return {def(), userBlock()}; + // Note that field index is stored in the first SpillEntry for a particular + // definition. Subsequent mentions of a defintion do not have fieldNo + // assigned. This works out fine as the users of Spills capture the info about + // the definition the first time they encounter it. Consider refactoring + // SpillInfo into two arrays to normalize the spill representation. + unsigned field() const { + assert(FieldNo && "Accessing unassigned field"); + return FieldNo; + } + void setField(unsigned FieldNumber) { + assert(!FieldNo && "Reassigning field number"); + FieldNo = FieldNumber; } - bool operator<(Spill const &rhs) const { return getKey() < rhs.getKey(); } +private: + unsigned FieldNo = 0; }; // Note that there may be more than one record with the same value of Def in @@ -296,6 +307,55 @@ } #endif +// We cannot rely solely on natural alignment of a type when building a +// coroutine frame and if the alignment specified on the Alloca instruciton +// differs from the natural alignment of the alloca type we will need to insert +// padding. +struct PaddingCalculator { + const DataLayout &DL; + LLVMContext &Context; + unsigned StructSize = 0; + + PaddingCalculator(LLVMContext &Context, DataLayout const &DL) + : DL(DL), Context(Context) {} + + // Replicate the logic from IR/DataLayout.cpp to match field offset + // computation for LLVM structs. + void addType(Type *Ty) { + unsigned TyAlign = DL.getABITypeAlignment(Ty); + if ((StructSize & (TyAlign - 1)) != 0) + StructSize = alignTo(StructSize, TyAlign); + + StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item. + } + + void addTypes(SmallVectorImpl const &Types) { + for (auto *Ty : Types) + addType(Ty); + } + + unsigned computePadding(Type *Ty, unsigned ForcedAlignment) { + unsigned TyAlign = DL.getABITypeAlignment(Ty); + auto Natural = alignTo(StructSize, TyAlign); + auto Forced = alignTo(StructSize, ForcedAlignment); + + // Return how many bytes of padding we need to insert. + if (Natural != Forced) + return std::max(Natural, Forced) - StructSize; + + // Rely on natural alignment. + return 0; + } + + // If padding required, return the padding field type to insert. + ArrayType *getPaddingType(Type *Ty, unsigned ForcedAlignment) { + if (auto Padding = computePadding(Ty, ForcedAlignment)) + return ArrayType::get(Type::getInt8Ty(Context), Padding); + + return nullptr; + } +}; + // Build a struct that will keep state for an active coroutine. // struct f.frame { // ResumeFnTy ResumeFnAddr; @@ -307,6 +367,8 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, SpillInfo &Spills) { LLVMContext &C = F.getContext(); + const DataLayout &DL = F.getParent()->getDataLayout(); + PaddingCalculator Padder(C, DL); SmallString<32> Name(F.getName()); Name.append(".Frame"); StructType *FrameTy = StructType::create(C, Name); @@ -324,8 +386,10 @@ Type::getIntNTy(C, IndexBits)}; Value *CurrentDef = nullptr; + Padder.addTypes(Types); + // Create an entry for every spilled value. - for (auto const &S : Spills) { + for (auto &S : Spills) { if (CurrentDef == S.def()) continue; @@ -335,12 +399,23 @@ continue; Type *Ty = nullptr; - if (auto *AI = dyn_cast(CurrentDef)) + if (auto *AI = dyn_cast(CurrentDef)) { Ty = AI->getAllocatedType(); - else + if (unsigned AllocaAlignment = AI->getAlignment()) { + // If alignment is specified in alloca, see if we need to insert extra + // padding. + if (auto PaddingTy = Padder.getPaddingType(Ty, AllocaAlignment)) { + Types.push_back(PaddingTy); + Padder.addType(PaddingTy); + } + } + } else Ty = CurrentDef->getType(); + S.setField(Types.size()); + Types.push_back(Ty); + Padder.addType(Ty); } FrameTy->setBody(Types); @@ -380,7 +455,7 @@ Value *CurrentValue = nullptr; BasicBlock *CurrentBlock = nullptr; Value *CurrentReload = nullptr; - unsigned Index = coro::Shape::LastKnownField; + unsigned Index = 0; // We need to keep track of any allocas that need "spilling" // since they will live in the coroutine frame now, all access to them @@ -395,6 +470,7 @@ // Create a load instruction to reload the spilled value from the coroutine // frame. auto CreateReload = [&](Instruction *InsertBefore) { + assert(Index && "accessing unassigned field number"); Builder.SetInsertPoint(InsertBefore); auto *G = Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, Index, CurrentValue->getName() + @@ -411,8 +487,7 @@ CurrentValue = E.def(); CurrentBlock = nullptr; CurrentReload = nullptr; - - ++Index; + Index = E.field(); if (auto *AI = dyn_cast(CurrentValue)) { // Spilled AllocaInst will be replaced with GEP from the coroutine frame @@ -565,7 +640,7 @@ // loop: // %n.val = phi i32[%n, %entry], [%inc, %loop] // - // It will create: + // It will create: // // loop.from.entry: // %n.loop.pre = phi i32 [%n, %entry] @@ -789,7 +864,6 @@ break; // Rewrite materializable instructions to be materialized at the use point. - std::sort(Spills.begin(), Spills.end()); DEBUG(dump("Materializations", Spills)); rewriteMaterializableInstructions(Builder, Spills); Spills.clear(); @@ -820,7 +894,6 @@ Spills.emplace_back(&I, U); } } - std::sort(Spills.begin(), Spills.end()); DEBUG(dump("Spills", Spills)); moveSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin); Shape.FrameTy = buildFrameType(F, Shape, Spills); Index: lib/Transforms/Coroutines/CoroInternal.h =================================================================== --- lib/Transforms/Coroutines/CoroInternal.h +++ lib/Transforms/Coroutines/CoroInternal.h @@ -76,7 +76,6 @@ DestroyField, PromiseField, IndexField, - LastKnownField = IndexField }; StructType *FrameTy; Index: test/Transforms/Coroutines/coro-padding.ll =================================================================== --- /dev/null +++ test/Transforms/Coroutines/coro-padding.ll @@ -0,0 +1,61 @@ +; Check that we will insert the correct padding if natural alignment of the +; spilled data does not match the alignment specified in alloca instruction. +; RUN: opt < %s -coro-split -S | FileCheck %s + +%PackedStruct = type <{ i64 }> + +declare void @consume(%PackedStruct*) + +define i8* @f() "coroutine.presplit"="1" { +entry: + %data = alloca %PackedStruct, align 8 + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + call void @consume(%PackedStruct* %data) + %0 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %0, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + call void @consume(%PackedStruct* %data) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; See if the padding was inserted before PackedStruct +; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, [6 x i8], %PackedStruct } + +; See if we used correct index to access packed struct (padding is field 4) +; CHECK-LABEL: @f( +; CHECK: %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5 +; CHECK-NEXT: call void @consume(%PackedStruct* %[[DATA]]) +; CHECK: ret i8* + +; See if we used correct index to access packed struct (padding is field 4) +; CHECK-LABEL: @f.resume( +; CHECK: %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5 +; CHECK-NEXT: call void @consume(%PackedStruct* %[[DATA]]) +; CHECK: ret void + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @malloc(i32) +declare double @print(double) +declare void @free(i8*)