Index: llvm/trunk/docs/LangRef.rst =================================================================== --- llvm/trunk/docs/LangRef.rst +++ llvm/trunk/docs/LangRef.rst @@ -792,7 +792,10 @@ An argument allocation may be used by a call at most once because the call may deallocate it. The ``inalloca`` attribute cannot be used in conjunction with other attributes that affect argument - storage, like ``inreg``, ``nest``, ``sret``, or ``byval``. + storage, like ``inreg``, ``nest``, ``sret``, or ``byval``. The + ``inalloca`` attribute also disables LLVM's implicit lowering of + large aggregate return values, which means that frontend authors + must lower them with ``sret`` pointers. When the call site is reached, the argument allocation must have been the most recent stack allocation that is still live, or the Index: llvm/trunk/include/llvm/Support/CallSite.h =================================================================== --- llvm/trunk/include/llvm/Support/CallSite.h +++ llvm/trunk/include/llvm/Support/CallSite.h @@ -268,9 +268,10 @@ paramHasAttr(ArgNo + 1, Attribute::InAlloca); } - /// @brief Determine if there are any inalloca arguments. + /// @brief Determine if there are is an inalloca argument. Only the last + /// argument can have the inalloca attribute. bool hasInAllocaArgument() const { - return getAttributes().hasAttrSomewhere(Attribute::InAlloca); + return paramHasAttr(arg_size(), Attribute::InAlloca); } bool doesNotAccessMemory(unsigned ArgNo) const { Index: llvm/trunk/include/llvm/Target/TargetCallingConv.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetCallingConv.h +++ llvm/trunk/include/llvm/Target/TargetCallingConv.h @@ -42,6 +42,8 @@ static const uint64_t ByValAlignOffs = 7; static const uint64_t Split = 1ULL<<11; static const uint64_t SplitOffs = 11; + static const uint64_t InAlloca = 1ULL<<12; ///< Passed with inalloca + static const uint64_t InAllocaOffs = 12; static const uint64_t OrigAlign = 0x1FULL<<27; static const uint64_t OrigAlignOffs = 27; static const uint64_t ByValSize = 0xffffffffULL<<32; ///< Struct size @@ -68,6 +70,9 @@ bool isByVal() const { return Flags & ByVal; } void setByVal() { Flags |= One << ByValOffs; } + bool isInAlloca() const { return Flags & InAlloca; } + void setInAlloca() { Flags |= One << InAllocaOffs; } + bool isNest() const { return Flags & Nest; } void setNest() { Flags |= One << NestOffs; } Index: llvm/trunk/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h +++ llvm/trunk/include/llvm/Target/TargetLowering.h @@ -609,8 +609,9 @@ return getValueType(Ty, AllowUnknown).getSimpleVT(); } - /// Return the desired alignment for ByVal aggregate function arguments in the - /// caller parameter area. This is the actual alignment, not its logarithm. + /// Return the desired alignment for ByVal or InAlloca aggregate function + /// arguments in the caller parameter area. This is the actual alignment, not + /// its logarithm. virtual unsigned getByValTypeAlignment(Type *Ty) const; /// Return the type of registers that this ValueType will eventually require. @@ -1965,12 +1966,13 @@ bool isSRet : 1; bool isNest : 1; bool isByVal : 1; + bool isInAlloca : 1; bool isReturned : 1; uint16_t Alignment; ArgListEntry() : isSExt(false), isZExt(false), isInReg(false), - isSRet(false), isNest(false), isByVal(false), isReturned(false), - Alignment(0) { } + isSRet(false), isNest(false), isByVal(false), isInAlloca(false), + isReturned(false), Alignment(0) { } void setAttributes(ImmutableCallSite *CS, unsigned AttrIdx); }; Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5434,6 +5434,8 @@ int DemoteStackIdx = -100; if (!CanLowerReturn) { + assert(!CS.hasInAllocaArgument() && + "sret demotion is incompatible with inalloca"); uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize( FTy->getReturnType()); unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment( @@ -7142,8 +7144,18 @@ Flags.setInReg(); if (Args[i].isSRet) Flags.setSRet(); - if (Args[i].isByVal) { + if (Args[i].isByVal) Flags.setByVal(); + if (Args[i].isInAlloca) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. + Flags.setByVal(); + } + if (Args[i].isByVal || Args[i].isInAlloca) { PointerType *Ty = cast(Args[i].Ty); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); @@ -7362,8 +7374,18 @@ Flags.setInReg(); if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) Flags.setSRet(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) { + if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) Flags.setByVal(); + if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. + Flags.setByVal(); + } + if (Flags.isByVal() || Flags.isInAlloca()) { PointerType *Ty = cast(I->getType()); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(TD->getTypeAllocSize(ElementTy)); Index: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -75,6 +75,7 @@ isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); Alignment = CS->getParamAlignment(AttrIdx); } Index: llvm/trunk/lib/IR/Mangler.cpp =================================================================== --- llvm/trunk/lib/IR/Mangler.cpp +++ llvm/trunk/lib/IR/Mangler.cpp @@ -65,8 +65,8 @@ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI) { Type *Ty = AI->getType(); - // 'Dereference' type in case of byval parameter attribute - if (AI->hasByValAttr()) + // 'Dereference' type in case of byval or inalloca parameter attribute. + if (AI->hasByValOrInAllocaAttr()) Ty = cast(Ty)->getElementType(); // Size should be aligned to DWORD boundary ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4; Index: llvm/trunk/lib/Target/X86/X86FastISel.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86FastISel.cpp +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp @@ -1910,6 +1910,10 @@ if (isVarArg && isWin64) return false; + // Don't know about inalloca yet. + if (CS.hasInAllocaArgument()) + return false; + // Fast-isel doesn't know about callee-pop yet. if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg, TM.Options.GuaranteedTailCallOpt)) Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -2584,9 +2584,21 @@ X86Info->setTCReturnAddrDelta(FPDiff); } + unsigned NumBytesToPush = NumBytes; + unsigned NumBytesToPop = NumBytes; + + // If we have an inalloca argument, all stack space has already been allocated + // for us and be right at the top of the stack. We don't support multiple + // arguments passed in memory when using inalloca. + if (!Outs.empty() && Outs.back().Flags.isInAlloca()) { + NumBytesToPush = 0; + assert(ArgLocs.back().getLocMemOffset() == 0 && + "an inalloca argument must be the only memory argument"); + } + if (!IsSibcall) - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), - dl); + Chain = DAG.getCALLSEQ_START( + Chain, DAG.getIntPtrConstant(NumBytesToPush, true), dl); SDValue RetAddrFrIdx; // Load return address for tail calls. @@ -2603,10 +2615,14 @@ const X86RegisterInfo *RegInfo = static_cast(getTargetMachine().getRegisterInfo()); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + // Skip inalloca arguments, they have already been written. + ISD::ArgFlagsTy Flags = Outs[i].Flags; + if (Flags.isInAlloca()) + continue; + CCValAssign &VA = ArgLocs[i]; EVT RegVT = VA.getLocVT(); SDValue Arg = OutVals[i]; - ISD::ArgFlagsTy Flags = Outs[i].Flags; bool isByVal = Flags.isByVal(); // Promote the value if needed. @@ -2873,8 +2889,9 @@ SmallVector Ops; if (!IsSibcall && isTailCall) { - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag, dl); + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getIntPtrConstant(NumBytesToPop, true), + DAG.getIntPtrConstant(0, true), InFlag, dl); InFlag = Chain.getValue(1); } @@ -2931,7 +2948,7 @@ // Returns a flag for retval copy to use. if (!IsSibcall) { Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(NumBytesToPop, true), DAG.getIntPtrConstant(NumBytesForCalleeToPop, true), InFlag, dl); Index: llvm/trunk/test/CodeGen/X86/inalloca-ctor.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/inalloca-ctor.ll +++ llvm/trunk/test/CodeGen/X86/inalloca-ctor.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s + +%Foo = type { i32, i32 } + +%frame = type { %Foo, i32, %Foo } + +declare void @f(%frame* inalloca %a) + +declare void @Foo_ctor(%Foo* %this) + +define void @g() { +entry: + %args = alloca %frame, inalloca + %c = getelementptr %frame* %args, i32 0, i32 2 +; CHECK: movl $20, %eax +; CHECK: calll __chkstk +; CHECK: movl %esp, + call void @Foo_ctor(%Foo* %c) +; CHECK: leal 12(%{{.*}}), +; CHECK: subl $4, %esp +; CHECK: calll _Foo_ctor +; CHECK: addl $4, %esp + %b = getelementptr %frame* %args, i32 0, i32 1 + store i32 42, i32* %b +; CHECK: movl $42, + %a = getelementptr %frame* %args, i32 0, i32 0 + call void @Foo_ctor(%Foo* %a) +; CHECK: subl $4, %esp +; CHECK: calll _Foo_ctor +; CHECK: addl $4, %esp + call void @f(%frame* inalloca %args) +; CHECK: calll _f + ret void +} Index: llvm/trunk/test/CodeGen/X86/inalloca-invoke.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/inalloca-invoke.ll +++ llvm/trunk/test/CodeGen/X86/inalloca-invoke.ll @@ -0,0 +1,54 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s + +%Iter = type { i32, i32, i32 } + +%frame.reverse = type { %Iter, %Iter } + +declare void @llvm.stackrestore(i8*) +declare i8* @llvm.stacksave() +declare void @begin(%Iter* sret) +declare void @plus(%Iter* sret, %Iter*, i32) +declare void @reverse(%frame.reverse* inalloca align 4) + +define i32 @main() { + %temp.lvalue = alloca %Iter + br label %blah + +blah: + %inalloca.save = call i8* @llvm.stacksave() + %rev_args = alloca %frame.reverse, inalloca, align 4 + %beg = getelementptr %frame.reverse* %rev_args, i32 0, i32 0 + %end = getelementptr %frame.reverse* %rev_args, i32 0, i32 1 + +; CHECK: calll __chkstk +; CHECK: movl %[[beg:[^,]*]], %esp +; CHECK: leal 12(%[[beg]]), %[[end:[^ ]*]] + + call void @begin(%Iter* sret %temp.lvalue) +; CHECK: calll _begin + + invoke void @plus(%Iter* sret %end, %Iter* %temp.lvalue, i32 4) + to label %invoke.cont unwind label %lpad + +; Uses end as sret param. +; CHECK: movl %[[end]], (%esp) +; CHECK: calll _plus + +invoke.cont: + call void @begin(%Iter* sret %beg) + +; CHECK: movl %[[beg]], +; CHECK: calll _begin + + invoke void @reverse(%frame.reverse* inalloca align 4 %rev_args) + to label %invoke.cont5 unwind label %lpad + +invoke.cont5: ; preds = %invoke.cont + call void @llvm.stackrestore(i8* %inalloca.save) + ret i32 0 + +lpad: ; preds = %invoke.cont, %entry + %lp = landingpad { i8*, i32 } personality i8* null + cleanup + unreachable +} Index: llvm/trunk/test/CodeGen/X86/inalloca-stdcall.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/inalloca-stdcall.ll +++ llvm/trunk/test/CodeGen/X86/inalloca-stdcall.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s + +%Foo = type { i32, i32 } + +declare x86_stdcallcc void @f(%Foo* inalloca %a) +declare x86_stdcallcc void @i(i32 %a) + +define void @g() { + %b = alloca %Foo, inalloca +; CHECK: movl $8, %eax +; CHECK: calll __chkstk +; CHECK: movl %[[REG:[^,]*]], %esp + %f1 = getelementptr %Foo* %b, i32 0, i32 0 + %f2 = getelementptr %Foo* %b, i32 0, i32 1 + store i32 13, i32* %f1 + store i32 42, i32* %f2 +; CHECK: movl $13, (%[[REG]]) +; CHECK: movl $42, 4(%[[REG]]) + call x86_stdcallcc void @f(%Foo* inalloca %b) +; CHECK: calll _f@8 +; CHECK-NOT: %esp +; CHECK: subl $4, %esp +; CHECK: calll _i@4 + call x86_stdcallcc void @i(i32 0) + ret void +} Index: llvm/trunk/test/CodeGen/X86/inalloca.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/inalloca.ll +++ llvm/trunk/test/CodeGen/X86/inalloca.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s + +%Foo = type { i32, i32 } + +declare void @f(%Foo* inalloca %b) + +define void @a() { +; CHECK-LABEL: _a: +entry: + %b = alloca %Foo, inalloca +; CHECK: movl $8, %eax +; CHECK: calll __chkstk +; CHECK: movl %[[REG:[^,]*]], %esp + %f1 = getelementptr %Foo* %b, i32 0, i32 0 + %f2 = getelementptr %Foo* %b, i32 0, i32 1 + store i32 13, i32* %f1 + store i32 42, i32* %f2 +; CHECK: movl $13, (%[[REG]]) +; CHECK: movl $42, 4(%[[REG]]) + call void @f(%Foo* inalloca %b) +; CHECK: calll _f + ret void +} + +declare void @inreg_with_inalloca(i32 inreg %a, %Foo* inalloca %b) + +define void @b() { +; CHECK-LABEL: _b: +entry: + %b = alloca %Foo, inalloca +; CHECK: movl $8, %eax +; CHECK: calll __chkstk +; CHECK: movl %[[REG:[^,]*]], %esp + %f1 = getelementptr %Foo* %b, i32 0, i32 0 + %f2 = getelementptr %Foo* %b, i32 0, i32 1 + store i32 13, i32* %f1 + store i32 42, i32* %f2 +; CHECK: movl $13, (%[[REG]]) +; CHECK: movl $42, 4(%[[REG]]) + call void @inreg_with_inalloca(i32 inreg 1, %Foo* inalloca %b) +; CHECK: movl $1, %eax +; CHECK: calll _inreg_with_inalloca + ret void +} + +declare x86_thiscallcc void @thiscall_with_inalloca(i8* %a, %Foo* inalloca %b) + +define void @c() { +; CHECK-LABEL: _c: +entry: + %b = alloca %Foo, inalloca +; CHECK: movl $8, %eax +; CHECK: calll __chkstk +; CHECK: movl %[[REG:[^,]*]], %esp + %f1 = getelementptr %Foo* %b, i32 0, i32 0 + %f2 = getelementptr %Foo* %b, i32 0, i32 1 + store i32 13, i32* %f1 + store i32 42, i32* %f2 +; CHECK: movl $13, (%[[REG]]) +; CHECK: movl $42, 4(%[[REG]]) + call x86_thiscallcc void @thiscall_with_inalloca(i8* null, %Foo* inalloca %b) +; CHECK: xorl %ecx, %ecx +; CHECK: calll _thiscall_with_inalloca + ret void +}