diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -429,12 +429,15 @@ Align StructAlign; bool IsFinished = false; + Optional MaxFrameAlignment; + SmallVector Fields; DenseMap FieldIndexByKey; public: - FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL) - : DL(DL), Context(Context) {} + FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL, + Optional MaxFrameAlignment) + : DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {} /// Add a field to this structure for the storage of an `alloca` /// instruction. @@ -485,7 +488,8 @@ /// Add a field to this structure. LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment, - bool IsHeader = false) { + bool IsHeader = false, + bool IsSpillOfValue = false) { assert(!IsFinished && "adding fields to a finished builder"); assert(Ty && "must provide a type for a field"); @@ -500,8 +504,16 @@ // The field alignment might not be the type alignment, but we need // to remember the type alignment anyway to build the type. - Align TyAlignment = DL.getABITypeAlign(Ty); - if (!FieldAlignment) FieldAlignment = TyAlignment; + // If we are spilling values we don't need to worry about ABI alignment + // concerns. + auto ABIAlign = DL.getABITypeAlign(Ty); + Align TyAlignment = + (IsSpillOfValue && MaxFrameAlignment) + ? (*MaxFrameAlignment < ABIAlign ? *MaxFrameAlignment : ABIAlign) + : ABIAlign; + if (!FieldAlignment) { + FieldAlignment = TyAlignment; + } // Lay out header fields immediately. uint64_t Offset; @@ -1089,7 +1101,11 @@ return StructType::create(C, Name); }(); - FrameTypeBuilder B(C, DL); + // We will use this value to cap the alignment of spilled values. + Optional MaxFrameAlignment; + if (Shape.ABI == coro::ABI::Async) + MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment(); + FrameTypeBuilder B(C, DL, MaxFrameAlignment); AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); Optional SwitchIndexFieldId; @@ -1142,7 +1158,8 @@ if (const Argument *A = dyn_cast(S.first)) if (A->hasByValAttr()) FieldType = FieldType->getPointerElementType(); - FieldIDType Id = B.addField(FieldType, None); + FieldIDType Id = + B.addField(FieldType, None, false /*header*/, true /*IsSpillOfValue*/); FrameData.setFieldIndex(S.first, Id); } @@ -1545,6 +1562,7 @@ for (auto const &E : FrameData.Spills) { Value *Def = E.first; + auto TyAlignment = Align(FrameData.getAlign(Def)); // Create a store instruction storing the value into the // coroutine frame. Instruction *InsertPt = nullptr; @@ -1601,9 +1619,9 @@ // instead of the pointer itself. auto *Value = Builder.CreateLoad(Def->getType()->getPointerElementType(), Def); - Builder.CreateStore(Value, G); + Builder.CreateAlignedStore(Value, G, TyAlignment); } else { - Builder.CreateStore(Def, G); + Builder.CreateAlignedStore(Def, G, TyAlignment); } BasicBlock *CurrentBlock = nullptr; @@ -1621,9 +1639,9 @@ if (NeedToCopyArgPtrValue) CurrentReload = GEP; else - CurrentReload = Builder.CreateLoad( + CurrentReload = Builder.CreateAlignedLoad( FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP, - E.first->getName() + Twine(".reload")); + TyAlignment, E.first->getName() + Twine(".reload")); TinyPtrVector DIs = FindDbgDeclareUses(Def); for (DbgDeclareInst *DDI : DIs) { diff --git a/llvm/test/Transforms/Coroutines/coro-async.ll b/llvm/test/Transforms/Coroutines/coro-async.ll --- a/llvm/test/Transforms/Coroutines/coro-async.ll +++ b/llvm/test/Transforms/Coroutines/coro-async.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -enable-coroutines -passes='default' -S | FileCheck --check-prefixes=CHECK %s -; RUN: opt < %s -enable-coroutines -O0 -S +; RUN: opt < %s -enable-coroutines -O0 -S | FileCheck --check-prefixes=CHECK-O0 %s target datalayout = "p:64:64:64" %async.task = type { i64 } @@ -57,6 +57,7 @@ define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor) !dbg !1 { entry: %tmp = alloca { i64, i64 }, align 8 + %vector = alloca <4 x double>, align 16 %proj.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp, i64 0, i32 0 %proj.2 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp, i64 0, i32 1 @@ -88,6 +89,7 @@ store i8* %async.ctxt, i8** %callee_context.caller_context.addr %resume_proj_fun = bitcast i8*(i8*)* @resume_context_projection to i8* %callee = bitcast void(i8*, %async.task*, %async.actor*)* @asyncSuspend to i8* + %vector_spill = load <4 x double>, <4 x double>* %vector, align 16 %res = call {i8*, i8*, i8*} (i32, i8*, i8*, ...) @llvm.coro.suspend.async(i32 0, i8* %resume.func_ptr, i8* %resume_proj_fun, @@ -101,7 +103,7 @@ call void @some_user(i64 %val) %val.2 = load i64, i64* %proj.2 call void @some_user(i64 %val.2) - + store <4 x double> %vector_spill, <4 x double>* %vector, align 16 tail call swiftcc void @asyncReturn(i8* %async.ctxt, %async.task* %task.2, %async.actor* %actor) call i1 (i8*, i1, ...) @llvm.coro.end.async(i8* %hdl, i1 0) unreachable @@ -119,6 +121,7 @@ ; CHECK: @my_async_function2_fp = constant <{ i32, i32 }> <{ {{.*}}, i32 176 } ; CHECK-LABEL: define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor) +; CHECK-O0-LABEL: define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor) ; CHECK-SAME: !dbg ![[SP1:[0-9]+]] { ; CHECK: coro.return: ; CHECK: [[FRAMEPTR:%.*]] = getelementptr inbounds i8, i8* %async.ctxt, i64 128 @@ -144,16 +147,23 @@ ; CHECK: store i8* bitcast (void (i8*, i8*, i8*)* @my_async_function.resume.0 to i8*), i8** [[RETURN_TO_CALLER_ADDR]] ; CHECK: [[CALLER_CONTEXT_ADDR:%.*]] = bitcast i8* [[CALLEE_CTXT]] to i8** ; CHECK: store i8* %async.ctxt, i8** [[CALLER_CONTEXT_ADDR]] +; Make sure the spill is underaligned to the max context alignment (16). +; CHECK-O0: [[VECTOR_SPILL:%.*]] = load <4 x double>, <4 x double>* {{.*}} +; CHECK-O0: [[VECTOR_SPILL_ADDR:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* {{.*}}, i32 0, i32 1 +; CHECK-O0: store <4 x double> [[VECTOR_SPILL]], <4 x double>* [[VECTOR_SPILL_ADDR]], align 16 ; CHECK: tail call swiftcc void @asyncSuspend(i8* [[CALLEE_CTXT]], %async.task* %task, %async.actor* %actor) ; CHECK: ret void ; CHECK: } ; CHECK-LABEL: define internal swiftcc void @my_async_function.resume.0(i8* nocapture readonly swiftasync %0, i8* %1, i8* nocapture readnone %2) +; CHECK-O0-LABEL: define internal swiftcc void @my_async_function.resume.0(i8* swiftasync %0, i8* %1, i8* %2) ; CHECK-SAME: !dbg ![[SP2:[0-9]+]] { ; CHECK: entryresume.0: ; CHECK: [[CALLER_CONTEXT_ADDR:%.*]] = bitcast i8* %0 to i8** ; CHECK: [[CALLER_CONTEXT:%.*]] = load i8*, i8** [[CALLER_CONTEXT_ADDR]] ; CHECK: [[FRAME_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALLER_CONTEXT]], i64 128 +; CHECK-O0: [[VECTOR_SPILL_ADDR:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* {{.*}}, i32 0, i32 1 +; CHECK-O0: load <4 x double>, <4 x double>* [[VECTOR_SPILL_ADDR]], align 16 ; CHECK: [[CALLEE_CTXT_SPILL_ADDR:%.*]] = getelementptr inbounds i8, i8* [[CALLER_CONTEXT]], i64 160 ; CHECK: [[CAST1:%.*]] = bitcast i8* [[CALLEE_CTXT_SPILL_ADDR]] to i8** ; CHECK: [[CALLEE_CTXT_RELOAD:%.*]] = load i8*, i8** [[CAST1]]