Index: include/llvm/Target/TargetCallingConv.h =================================================================== --- include/llvm/Target/TargetCallingConv.h +++ include/llvm/Target/TargetCallingConv.h @@ -42,6 +42,8 @@ static const uint64_t ByValAlignOffs = 7; static const uint64_t Split = 1ULL<<11; static const uint64_t SplitOffs = 11; + static const uint64_t InAlloca = 1ULL<<12; ///< Passed in alloca + static const uint64_t InAllocaOffs = 12; static const uint64_t OrigAlign = 0x1FULL<<27; static const uint64_t OrigAlignOffs = 27; static const uint64_t ByValSize = 0xffffffffULL<<32; ///< Struct size @@ -68,6 +70,9 @@ bool isByVal() const { return Flags & ByVal; } void setByVal() { Flags |= One << ByValOffs; } + bool isInAlloca() const { return Flags & InAlloca; } + void setInAlloca() { Flags |= One << InAllocaOffs; } + bool isNest() const { return Flags & Nest; } void setNest() { Flags |= One << NestOffs; } Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -24,6 +24,7 @@ #define LLVM_TARGET_TARGETLOWERING_H #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -604,8 +605,9 @@ return getValueType(Ty, AllowUnknown).getSimpleVT(); } - /// Return the desired alignment for ByVal aggregate function arguments in the - /// caller parameter area. This is the actual alignment, not its logarithm. + /// Return the desired alignment for ByVal or InAlloca aggregate function + /// arguments in the caller parameter area. This is the actual alignment, not + /// its logarithm. virtual unsigned getByValTypeAlignment(Type *Ty) const; /// Return the type of registers that this ValueType will eventually require. @@ -1935,12 +1937,13 @@ bool isSRet : 1; bool isNest : 1; bool isByVal : 1; + bool isInAlloca : 1; bool isReturned : 1; uint16_t Alignment; ArgListEntry() : isSExt(false), isZExt(false), isInReg(false), - isSRet(false), isNest(false), isByVal(false), isReturned(false), - Alignment(0) { } + isSRet(false), isNest(false), isByVal(false), isInAlloca(false), + isReturned(false), Alignment(0) { } void setAttributes(ImmutableCallSite *CS, unsigned AttrIdx); }; @@ -2021,6 +2024,15 @@ llvm_unreachable("Not Implemented"); } + void AnalyzeCallOutArgs(CallLoweringInfo &CLI, bool EmitNodes) const; + + /// Analyze the call to find the argument locations without adding any + /// SDNodes to the DAG. + virtual void AnalyzeCallArgs(CallLoweringInfo & /*CLI*/, + CCState & /*CCInfo*/) const { + llvm_unreachable("Not Implemented"); + } + /// Target-specific cleanup for formal ByVal parameters. virtual void HandleByVal(CCState *, unsigned &, unsigned) const {} Index: lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp =================================================================== --- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -74,7 +74,9 @@ // them. Function::const_iterator BB = Fn->begin(), EB = Fn->end(); for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (const AllocaInst *AI = dyn_cast(I)) + if (const AllocaInst *AI = dyn_cast(I)) { + if (AI->isUsedWithInAlloca()) + continue; if (const ConstantInt *CUI = dyn_cast(AI->getArraySize())) { Type *Ty = AI->getAllocatedType(); uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); @@ -88,6 +90,7 @@ StaticAllocaMap[AI] = MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); } + } for (; BB != EB; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -22,6 +22,7 @@ #include "llvm/IR/Constants.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLowering.h" #include namespace llvm { @@ -69,7 +70,6 @@ class SwitchInst; class DataLayout; class TargetLibraryInfo; -class TargetLowering; class TruncInst; class UIToFPInst; class UnreachableInst; @@ -617,6 +617,20 @@ bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); void CopyToExportRegsIfNeeded(const Value *V); void ExportFromCurrentBlock(const Value *V); + + /// \brief Demote an unlowerable return type to an implicit sret parameter. + /// Updates CLI.Args and CLI.RetTy. + /// + /// \param [opt,out] DemoteStackSlot - If non-null, a stack object is created + /// and returned in this outparam. + bool MaybeLowerResultAsSRet(TargetLowering::CallLoweringInfo &CLI, + SDValue *DemoteStackSlot); + + /// \brief Fill out CLI.Args with argument information. + /// + /// \param EmitNodes - If true, register copies for arguments are emitted. + void AddArgsToCLI(TargetLowering::CallLoweringInfo &CLI, bool EmitNodes); + void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, MachineBasicBlock *LandingPad = NULL); @@ -762,6 +776,7 @@ void visitVACopy(const CallInst &I); void visitStackmap(const CallInst &I); void visitPatchpoint(const CallInst &I); + void visitInAlloca(const AllocaInst &I); void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -970,7 +970,9 @@ visit(I.getOpcode(), I); - if (!isa(&I) && !HasTailCall) + // TODO: Try to localize inalloca values and remove this special case. + if (!isa(&I) && !HasTailCall && + (!isa(&I) || !cast(&I)->isUsedWithInAlloca())) CopyToExportRegsIfNeeded(&I); CurInst = NULL; @@ -3336,6 +3338,11 @@ if (FuncInfo.StaticAllocaMap.count(&I)) return; // getValue will auto-populate this. + if (I.isUsedWithInAlloca()) { + visitInAlloca(I); + return; + } + Type *Ty = I.getAllocatedType(); const TargetLowering *TLI = TM.getTargetLowering(); uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); @@ -3383,6 +3390,79 @@ FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, &I); } +void SelectionDAGBuilder::visitInAlloca(const AllocaInst &AI) { + // Find the call site that uses us, and figure out if we are last inalloca + // argument to the callsite, which implies that we are the first alloca to + // execute. + ImmutableCallSite CS = AI.getInAllocaCallSite(); + assert(CS); + for (int I = CS.arg_size() - 1; I >= 0; --I) { + const Value *Arg = CS.getArgument(I); + if (Arg == &AI) + break; // We were the last inalloca alloca, do the adjustment here. + if (CS.paramHasAttr(1 + I, Attribute::InAlloca)) { + return; // We weren't the last alloca. We'll be lowered elsewhere. + } + } + + // Analyze the call site to figure out how much stack memory to allocate. + PointerType *PT = cast(CS.getCalledValue()->getType()); + FunctionType *FTy = cast(PT->getElementType()); + Type *RetTy = FTy->getReturnType(); + TargetLowering::ArgListTy Args; + SmallVector ArgLocs; + MachineFunction &MF = DAG.getMachineFunction(); + TargetLowering::CallLoweringInfo CLI( + /*Chain=*/SDValue(), RetTy, FTy, /*isTailCall=*/false, + /*Callee=*/SDValue(), Args, DAG, SDLoc(), CS); + bool AddedSRet = !MaybeLowerResultAsSRet(CLI, 0); + AddArgsToCLI(CLI, /*EmitNodes=*/false); + + const TargetLowering *TLI = TM.getTargetLowering(); + CCState CCInfo(CS.getCallingConv(), FTy->isVarArg(), MF, TM, ArgLocs, + *DAG.getContext()); + TLI->AnalyzeCallArgs(CLI, CCInfo); + + // Dynamically allocate space for the stack frame. + // TODO: Can we support args aligned to greater than the stack alignment? + unsigned Align = 0; + SDValue AllocSize = DAG.getIntPtrConstant(CCInfo.getNextStackOffset()); + SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; + EVT IntPtr = TLI->getPointerTy(); + SDVTList VTs = DAG.getVTList(IntPtr, MVT::Other); + SDValue ArgMem = + DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops, 3); + DAG.setRoot(ArgMem.getValue(1)); + + // We need to tell frame information that we allocated a "variable" sized + // object in order to force the use of a base pointer. + FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, + &AI); + + // Compute the address of the inalloca args in the frame and create virtual + // registers for them. + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + unsigned ValNo = ArgLocs[I].getValNo(); + unsigned ArgNo = CLI.Outs[ValNo].OrigArgIndex - AddedSRet; + if (CS.paramHasAttr(1 + ArgNo, Attribute::InAlloca)) { + if (!ArgLocs[I].isMemLoc()) + report_fatal_error("inalloca used for register parameter"); + + SDValue AllocaAddr = + DAG.getNode(ISD::ADD, getCurSDLoc(), IntPtr, ArgMem, + DAG.getConstant(ArgLocs[I].getLocMemOffset(), IntPtr)); + + // Define exported virtual registers for all inalloca values up front. + // TODO: Try to localize them so we get better isel. + const Value *Alloca = CS.getArgument(ArgNo); + assert(isa(Alloca) && "inalloca with non-alloca?"); + setValue(Alloca, AllocaAddr); + CopyToExportRegsIfNeeded(Alloca); + } + assert(I + 1 == E || ArgLocs[I + 1].getValNo() != ArgNo); + } +} + void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (I.isAtomic()) return visitAtomicLoad(I); @@ -5352,17 +5432,14 @@ } } -void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, - bool isTailCall, - MachineBasicBlock *LandingPad) { +bool SelectionDAGBuilder::MaybeLowerResultAsSRet( + TargetLowering::CallLoweringInfo &CLI, SDValue *DemoteStackSlot) { + ImmutableCallSite &CS = *CLI.CS; PointerType *PT = cast(CS.getCalledValue()->getType()); FunctionType *FTy = cast(PT->getElementType()); Type *RetTy = FTy->getReturnType(); - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - MCSymbol *BeginLabel = 0; - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; + TargetLowering::ArgListTy &Args = CLI.Args; Args.reserve(CS.arg_size()); // Check whether the function can return without sret-demotion. @@ -5375,49 +5452,78 @@ FTy->isVarArg(), Outs, FTy->getContext()); - SDValue DemoteStackSlot; - int DemoteStackIdx = -100; + if (CanLowerReturn) + return true; - if (!CanLowerReturn) { - uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize( - FTy->getReturnType()); - unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment( - FTy->getReturnType()); + // Add an implicit sret parameter as the first argument entry. + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(RetTy); + unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(RetTy); + Type *StackSlotPtrType = PointerType::getUnqual(RetTy); + TargetLowering::ArgListEntry SRetEntry; + SRetEntry.Ty = StackSlotPtrType; + SRetEntry.isSRet = true; + SRetEntry.Alignment = Align; + + if (DemoteStackSlot) { MachineFunction &MF = DAG.getMachineFunction(); - DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); - - DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy()); - Entry.Node = DemoteStackSlot; - Entry.Ty = StackSlotPtrType; - Entry.isSExt = false; - Entry.isZExt = false; - Entry.isInReg = false; - Entry.isSRet = true; - Entry.isNest = false; - Entry.isByVal = false; - Entry.isReturned = false; - Entry.Alignment = Align; - Args.push_back(Entry); - RetTy = Type::getVoidTy(FTy->getContext()); + int DemoteStackIdx = + MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + *DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy()); + SRetEntry.Node = *DemoteStackSlot; } + Args.push_back(SRetEntry); + CLI.RetTy = Type::getVoidTy(FTy->getContext()); + return false; +} + +void SelectionDAGBuilder::AddArgsToCLI(TargetLowering::CallLoweringInfo &CLI, + bool EmitNodes) { + ImmutableCallSite &CS = *CLI.CS; for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { const Value *V = *i; - - // Skip empty types if (V->getType()->isEmptyTy()) - continue; - - SDValue ArgNode = getValue(V); - Entry.Node = ArgNode; Entry.Ty = V->getType(); - + continue; // Skip empty types + TargetLowering::ArgListEntry Entry; + Entry.Ty = V->getType(); // Skip the first return-type Attribute to get to params. Entry.setAttributes(&CS, i - CS.arg_begin() + 1); - Args.push_back(Entry); + + if (EmitNodes) + Entry.Node = getValue(V); + CLI.Args.push_back(Entry); } + const TargetLowering *TLI = TM.getTargetLowering(); + TLI->AnalyzeCallOutArgs(CLI, EmitNodes); +} + +void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, + bool isTailCall, + MachineBasicBlock *LandingPad) { + PointerType *PT = cast(CS.getCalledValue()->getType()); + FunctionType *FTy = cast(PT->getElementType()); + Type *RetTy = FTy->getReturnType(); + const TargetLowering *TLI = TM.getTargetLowering(); + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + MCSymbol *BeginLabel = 0; + + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within TLI->LowerCallTo. + if (isTailCall && !isInTailCallPosition(CS, *TLI)) + isTailCall = false; + if (isTailCall && CS.hasInAllocaArgument()) + isTailCall = false; + + TargetLowering::ArgListTy Args; + TargetLowering::CallLoweringInfo CLI(SDValue(), RetTy, FTy, isTailCall, + Callee, Args, DAG, getCurSDLoc(), CS); + + SDValue DemoteStackSlot; + bool CanLowerReturn = MaybeLowerResultAsSRet(CLI, &DemoteStackSlot); + AddArgsToCLI(CLI, /*EmitNodes=*/true); + if (LandingPad) { // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. @@ -5440,14 +5546,7 @@ DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel)); } - // Check if target-independent constraints permit a tail call here. - // Target-dependent constraints are checked within TLI->LowerCallTo. - if (isTailCall && !isInTailCallPosition(CS, *TLI)) - isTailCall = false; - - TargetLowering:: - CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG, - getCurSDLoc(), CS); + CLI.Chain = getRoot(); std::pair Result = TLI->LowerCallTo(CLI); assert((isTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); @@ -5459,7 +5558,7 @@ // The instruction result is the result of loading from the // hidden sret parameter. SmallVector PVTs; - Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); + Type *PtrRetTy = PointerType::getUnqual(RetTy); ComputeValueVTs(*TLI, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); @@ -5467,7 +5566,6 @@ SmallVector RetTys; SmallVector Offsets; - RetTy = FTy->getReturnType(); ComputeValueVTs(*TLI, RetTy, RetTys, &Offsets); unsigned NumValues = RetTys.size(); @@ -5478,9 +5576,11 @@ SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, DemoteStackSlot, DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(RetTys[i], getCurSDLoc(), Result.second, Add, - MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), - false, false, false, 1); + FrameIndexSDNode *FI = cast(DemoteStackSlot.getNode()); + SDValue L = DAG.getLoad( + RetTys[i], getCurSDLoc(), Result.second, Add, + MachinePointerInfo::getFixedStack(FI->getIndex(), Offsets[i]), + false, false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); } @@ -7033,35 +7133,8 @@ FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); } -/// TargetLowering::LowerCallTo - This is the default LowerCallTo -/// implementation, which just calls LowerCall. -/// FIXME: When all targets are -/// migrated to using LowerCall, this hook should be integrated into SDISel. -std::pair -TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { - // Handle the incoming return values from the call. - CLI.Ins.clear(); - SmallVector RetTys; - ComputeValueVTs(*this, CLI.RetTy, RetTys); - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); - for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT; - MyFlags.ArgVT = VT; - MyFlags.Used = CLI.IsReturnValueUsed; - if (CLI.RetSExt) - MyFlags.Flags.setSExt(); - if (CLI.RetZExt) - MyFlags.Flags.setZExt(); - if (CLI.IsInReg) - MyFlags.Flags.setInReg(); - CLI.Ins.push_back(MyFlags); - } - } - +void TargetLowering::AnalyzeCallOutArgs(CallLoweringInfo &CLI, + bool EmitNodes) const { // Handle all of the outgoing arguments. CLI.Outs.clear(); CLI.OutVals.clear(); @@ -7073,8 +7146,6 @@ Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); - SDValue Op = SDValue(Args[i].Node.getNode(), - Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = getDataLayout()->getABITypeAlignment(ArgTy); @@ -7087,8 +7158,13 @@ Flags.setInReg(); if (Args[i].isSRet) Flags.setSRet(); - if (Args[i].isByVal) { + if (Args[i].isByVal) Flags.setByVal(); + if (Args[i].isInAlloca) { + Flags.setByVal(); // TODO: Remove when isInAlloca() works. + Flags.setInAlloca(); + } + if (Args[i].isByVal || Args[i].isInAlloca) { PointerType *Ty = cast(Args[i].Ty); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); @@ -7107,7 +7183,6 @@ MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); - SmallVector Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; if (Args[i].isSExt) @@ -7116,9 +7191,8 @@ ExtendKind = ISD::ZERO_EXTEND; // Conservatively only handle 'returned' on non-vectors for now - if (Args[i].isReturned && !Op.getValueType().isVector()) { - assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && - "unexpected use of 'returned'"); + if (Args[i].isReturned && !VT.isVector()) { + assert(CLI.RetTy == Args[i].Ty && "unexpected use of 'returned'"); // Before passing 'returned' to the target lowering code, ensure that // either the register MVT and the actual EVT are the same size or that // the return value and argument are extended in the same way; in these @@ -7135,24 +7209,64 @@ Flags.setReturned(); } - getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, - PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind); + SmallVector Parts; + if (EmitNodes) { + SDValue Op = SDValue(Args[i].Node.getNode(), + Args[i].Node.getResNo() + Value); + Parts.resize(NumParts); + getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, + PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind); + } for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 - ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, - i < CLI.NumFixedArgs, - i, j*Parts[j].getValueType().getStoreSize()); + ISD::OutputArg MyFlags(Flags, PartVT, VT, i < CLI.NumFixedArgs, i, + j * PartVT.getStoreSize()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) MyFlags.Flags.setOrigAlign(1); CLI.Outs.push_back(MyFlags); - CLI.OutVals.push_back(Parts[j]); + if (EmitNodes) { + assert(Parts[j].getValueType() == PartVT); + CLI.OutVals.push_back(Parts[j]); + } } } } +} + +/// TargetLowering::LowerCallTo - This is the default LowerCallTo +/// implementation, which just calls LowerCall. +/// FIXME: When all targets are +/// migrated to using LowerCall, this hook should be integrated into SDISel. +std::pair +TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { + // Handle the incoming return values from the call. + CLI.Ins.clear(); + SmallVector RetTys; + ComputeValueVTs(*this, CLI.RetTy, RetTys); + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } + } + + AnalyzeCallOutArgs(CLI, true); SmallVector InVals; CLI.Chain = LowerCall(CLI, InVals); @@ -7307,8 +7421,13 @@ Flags.setInReg(); if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) Flags.setSRet(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) { + if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) Flags.setByVal(); + if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { + Flags.setByVal(); // TODO: Remove when isInAlloca() works. + Flags.setInAlloca(); + } + if (Flags.isByVal() || Flags.isInAlloca()) { PointerType *Ty = cast(I->getType()); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(TD->getTypeAllocSize(ElementTy)); Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -74,6 +74,7 @@ isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); Alignment = CS->getParamAlignment(AttrIdx); } Index: lib/Target/Mangler.cpp =================================================================== --- lib/Target/Mangler.cpp +++ lib/Target/Mangler.cpp @@ -66,8 +66,8 @@ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI) { Type *Ty = AI->getType(); - // 'Dereference' type in case of byval parameter attribute - if (AI->hasByValAttr()) + // 'Dereference' type in case of byval or inalloca parameter attribute + if (AI->hasByValOrInAllocaAttr()) Ty = cast(Ty)->getElementType(); // Size should be aligned to DWORD boundary ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4; Index: lib/Target/X86/X86FastISel.cpp =================================================================== --- lib/Target/X86/X86FastISel.cpp +++ lib/Target/X86/X86FastISel.cpp @@ -1905,6 +1905,10 @@ if (isVarArg && isWin64) return false; + // Don't know about inalloca yet. + if (CS.hasInAllocaArgument()) + return false; + // Fast-isel doesn't know about callee-pop yet. if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg, TM.Options.GuaranteedTailCallOpt)) Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -1487,8 +1487,8 @@ bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); bool IsLP64 = STI.isTarget64BitLP64(); DebugLoc DL = I->getDebugLoc(); - uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; - uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; + int64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; + int64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; I = MBB.erase(I); if (!reseveCallFrame) { @@ -1496,7 +1496,7 @@ // adjcallstackup instruction into a 'sub ESP, ' and the // adjcallstackdown instruction into 'add ESP, ' // TODO: consider using push / pop instead of sub + store / add - if (Amount == 0) + if (Amount == 0 && CalleeAmt == 0) return; // We need to keep the stack aligned properly. To do this, we round the @@ -1517,10 +1517,15 @@ // Factor out the amount the callee already popped. Amount -= CalleeAmt; - if (Amount) { + if (Amount > 0) { unsigned Opc = getADDriOpcode(IsLP64, Amount); New = BuildMI(MF, DL, TII.get(Opc), StackPtr) .addReg(StackPtr).addImm(Amount); + } else if (Amount < 0) { + Amount = -Amount; + unsigned Opc = getSUBriOpcode(IsLP64, Amount); + New = BuildMI(MF, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr).addImm(Amount); } } Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -912,6 +912,8 @@ LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; + virtual void AnalyzeCallArgs(CallLoweringInfo &CLI, CCState &CCInfo) const; + virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -2501,6 +2501,21 @@ return Chain; } +void X86TargetLowering::AnalyzeCallArgs(TargetLowering::CallLoweringInfo &CLI, + CCState &CCInfo) const { + SelectionDAG &DAG = CLI.DAG; + SmallVectorImpl &Outs = CLI.Outs; + CallingConv::ID CallConv = CLI.CallConv; + bool isVarArg = CLI.IsVarArg; + MachineFunction &MF = DAG.getMachineFunction(); + + // Allocate shadow area for Win64 + if (Subtarget->isCallingConvWin64(CallConv)) + CCInfo.AllocateStack(32, 8); + + CCInfo.AnalyzeCallOperands(Outs, CC_X86); +} + SDValue X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { @@ -2547,12 +2562,7 @@ SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs, *DAG.getContext()); - - // Allocate shadow area for Win64 - if (IsWin64) - CCInfo.AllocateStack(32, 8); - - CCInfo.AnalyzeCallOperands(Outs, CC_X86); + AnalyzeCallArgs(CLI, CCInfo); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -2578,9 +2588,18 @@ X86Info->setTCReturnAddrDelta(FPDiff); } + unsigned NumBytesToPush = NumBytes; + + // If there were any inalloca arguments, we should've already adjusted the + // stack at the first inalloca alloca. Note that the callee may clear the + // arguments for us, and we will have to undo it. See NumBytesForCalleeToPop + // below. + if (CLI.CS && CLI.CS->hasInAllocaArgument()) + NumBytesToPush = 0; + if (!IsSibcall) - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), - dl); + Chain = DAG.getCALLSEQ_START( + Chain, DAG.getIntPtrConstant(NumBytesToPush, true), dl); SDValue RetAddrFrIdx; // Load return address for tail calls. @@ -2597,10 +2616,14 @@ const X86RegisterInfo *RegInfo = static_cast(getTargetMachine().getRegisterInfo()); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + // Skip inalloca arguments, they have already been written. + ISD::ArgFlagsTy Flags = Outs[i].Flags; + if (Flags.isInAlloca()) + continue; + CCValAssign &VA = ArgLocs[i]; EVT RegVT = VA.getLocVT(); SDValue Arg = OutVals[i]; - ISD::ArgFlagsTy Flags = Outs[i].Flags; bool isByVal = Flags.isByVal(); // Promote the value if needed. @@ -2867,8 +2890,9 @@ SmallVector Ops; if (!IsSibcall && isTailCall) { - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag, dl); + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getIntPtrConstant(NumBytesToPush, true), + DAG.getIntPtrConstant(0, true), InFlag, dl); InFlag = Chain.getValue(1); } @@ -2907,10 +2931,10 @@ InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. - unsigned NumBytesForCalleeToPush; + unsigned NumBytesForCalleeToPop; if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, getTargetMachine().Options.GuaranteedTailCallOpt)) - NumBytesForCalleeToPush = NumBytes; // Callee pops everything + NumBytesForCalleeToPop = NumBytes; // Callee pops everything else if (!Is64Bit && !IsTailCallConvention(CallConv) && !Subtarget->getTargetTriple().isOSMSVCRT() && SR == StackStructReturn) @@ -2918,15 +2942,15 @@ // pops the hidden struct pointer, so we have to push it back. // This is common for Darwin/X86, Linux & Mingw32 targets. // For MSVC Win32 targets, the caller pops the hidden struct pointer. - NumBytesForCalleeToPush = 4; + NumBytesForCalleeToPop = 4; else - NumBytesForCalleeToPush = 0; // Callee pops nothing. + NumBytesForCalleeToPop = 0; // Callee pops nothing. // Returns a flag for retval copy to use. if (!IsSibcall) { Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(NumBytesForCalleeToPush, + DAG.getIntPtrConstant(NumBytesToPush, true), + DAG.getIntPtrConstant(NumBytesForCalleeToPop, true), InFlag, dl); InFlag = Chain.getValue(1); Index: test/CodeGen/X86/inalloca-ctor.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/inalloca-ctor.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s + +%Foo = type { i32, i32 } + +declare void @f(%Foo* inalloca %a, i32 %b, %Foo* inalloca %c) + +declare void @Foo_ctor(%Foo* %this) + +define void @g() { +entry: + %c = alloca %Foo +; CHECK: movl $20, %eax +; CHECK: calll __chkstk +; CHECK: movl %esp, + call void @Foo_ctor(%Foo* %c) +; CHECK: leal 12(%{{.*}}), +; CHECK: subl $4, %esp +; CHECK: calll _Foo_ctor +; CHECK: addl $4, %esp + %a = alloca %Foo + call void @Foo_ctor(%Foo* %a) +; CHECK: subl $4, %esp +; CHECK: calll _Foo_ctor +; CHECK: addl $4, %esp + call void @f(%Foo* inalloca %a, i32 3, %Foo* inalloca %c) +; CHECK: movl $3, 8(%esp) +; CHECK: calll _f + ret void +} Index: test/CodeGen/X86/inalloca-stdcall.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/inalloca-stdcall.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s + +; Make sure we keep the argument stack space live after a callee cleanup call. +; Otherwise we have pointers past the top of the stack in the IR. The plan is +; to write an MI pass to clean up these extraneous stack adjustments later. + +declare x86_stdcallcc void @f(i32* inalloca %a, i32* inalloca %b) +declare x86_stdcallcc void @i(i32 %a) + +define void @g() { + %b = alloca i32 + %a = alloca i32 + store i32 1, i32* %a + store i32 2, i32* %b +; CHECK: movl $1, (%esp) +; CHECK: movl $2, 4(%esp) + call x86_stdcallcc void @f(i32* inalloca %a, i32* inalloca %b) +; CHECK: calll _f@8 +; CHECK: subl $8, %esp +; CHECK: subl $4, %esp +; CHECK: calll _i@4 + call x86_stdcallcc void @i(i32 0) + ret void +} Index: test/CodeGen/X86/inalloca.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/inalloca.ll @@ -0,0 +1,66 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s + +%Foo = type { i32, i32 } + +declare void @f(%Foo* inalloca %a, i32 %b, %Foo* inalloca %c) + +define void @g() { +entry: + %c = alloca %Foo + %a = alloca %Foo +; CHECK: movl $20, %eax +; CHECK: calll __chkstk + %f1 = getelementptr %Foo* %a, i32 0, i32 0 + %f2 = getelementptr %Foo* %a, i32 0, i32 1 + store i32 1, i32* %f1 + store i32 2, i32* %f2 +; CHECK: movl $1, (%esp) +; CHECK: movl $2, 4(%esp) + %f3 = getelementptr %Foo* %c, i32 0, i32 0 + %f4 = getelementptr %Foo* %c, i32 0, i32 1 + store i32 4, i32* %f3 + store i32 5, i32* %f4 +; CHECK: movl $4, 12(%esp) +; CHECK: movl $5, 16(%esp) + call void @f(%Foo* inalloca %a, i32 3, %Foo* inalloca %c) +; CHECK: movl $3, 8(%esp) +; CHECK: calll _f + ret void +} + +declare void @i64_with_inalloca(i64 %a, %Foo* inalloca %b) + +define void @foo() { + %b = alloca %Foo +; CHECK: movl $16, %eax +; CHECK-NEXT: calll __chkstk + %f1 = getelementptr %Foo* %b, i32 0, i32 0 + store i32 42, i32* %f1 +; CHECK: movl $42, 8(%esp) +; CHECK-NEXT: movl $0, 4(%esp) +; CHECK-NEXT: movl $0, (%esp) +; CHECK-NEXT: calll _i64_with_inalloca + call void @i64_with_inalloca(i64 0, %Foo* inalloca %b) + ret void +} + +%sret_type = type { i32, i32, i32, i32 } + +declare %sret_type @sret_and_i64_with_inalloca(i64 %a, %Foo* inalloca %b) + +define void @bar() { + %r = alloca %sret_type + %b = alloca %Foo + %f1 = getelementptr %Foo* %b, i32 0, i32 0 + store i32 42, i32* %f1 +; CHECK: movl $42, 12(%esp) + +; The demoted sret param comes first, then the i64. +; CHECK: movl %{{.*}}, (%esp) +; CHECK: movl $0, 8(%esp) +; CHECK: movl $13, 4(%esp) +; CHECK-NEXT: calll _sret_and_i64_with_inalloca + %t = call %sret_type @sret_and_i64_with_inalloca(i64 13, %Foo* inalloca %b) + store %sret_type %t, %sret_type* %r + ret void +} Index: test/CodeGen/X86/inalloca2.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/inalloca2.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s + +%Foo = type { i32, i32 } + +declare void @f(%Foo* inalloca %a, i32 %b, %Foo* inalloca %c) + +define void @g(i1 zeroext %cond) { + %c = alloca %Foo +; CHECK: movl $20, %eax +; CHECK: calll __chkstk + %f4 = getelementptr %Foo* %c, i32 0, i32 0 + %f5 = getelementptr %Foo* %c, i32 0, i32 1 + store i32 4, i32* %f4 + store i32 5, i32* %f5 +; CHECK: movl $4, 12(%esp) +; CHECK: movl $5, 16(%esp) + br i1 %cond, label %bb2, label %exit + +bb2: + %a = alloca %Foo + %f1 = getelementptr %Foo* %a, i32 0, i32 0 + %f2 = getelementptr %Foo* %a, i32 0, i32 1 + store i32 1, i32* %f1 + store i32 2, i32* %f2 +; CHECK: movl $1, (%esp) +; CHECK: movl $2, 4(%esp) + br i1 %cond, label %bb3, label %exit + +bb3: + call void @f(%Foo* inalloca %a, i32 3, %Foo* inalloca %c) +; CHECK: movl $3, 8(%esp) +; CHECK: calll _f + br label %exit + +exit: + ret void +}