diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -797,6 +797,8 @@ /// pointer, and a SRCVALUE. VAEND, VASTART, + CALL_SETUP, CALL_ALLOC, + /// SRCVALUE - This is a node type that holds a Value* that is used to /// make reference to a value in the LLVM IR. SRCVALUE, diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h --- a/llvm/include/llvm/CodeGen/TargetCallingConv.h +++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h @@ -35,6 +35,7 @@ unsigned IsReturned : 1; ///< Always returned unsigned IsSplit : 1; unsigned IsInAlloca : 1; ///< Passed with inalloca + unsigned IsPreallocated : 1; ///< ByVal without the copy unsigned IsSplitEnd : 1; ///< Last part of a split unsigned IsSwiftSelf : 1; ///< Swift self parameter unsigned IsSwiftError : 1; ///< Swift error parameter @@ -56,9 +57,9 @@ public: ArgFlagsTy() : IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsNest(0), - IsReturned(0), IsSplit(0), IsInAlloca(0), IsSplitEnd(0), - IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0), IsHva(0), - IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0), + IsReturned(0), IsSplit(0), IsInAlloca(0), IsPreallocated(0), + IsSplitEnd(0), IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0), + IsHva(0), IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0), IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), IsCopyElisionCandidate(0), IsPointer(0), ByValSize(0), PointerAddrSpace(0) { @@ -83,6 +84,9 @@ bool isInAlloca() const { return IsInAlloca; } void setInAlloca() { IsInAlloca = 1; } + bool isPreallocated() const { return IsPreallocated; } + void setPreallocated() { IsPreallocated = 1; } + bool isSwiftSelf() const { return IsSwiftSelf; } void setSwiftSelf() { IsSwiftSelf = 1; } diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -273,17 +273,20 @@ bool IsNest : 1; bool IsByVal : 1; bool IsInAlloca : 1; + bool IsPreallocated : 1; bool IsReturned : 1; bool IsSwiftSelf : 1; bool IsSwiftError : 1; bool IsCFGuardTarget : 1; MaybeAlign Alignment = None; Type *ByValType = nullptr; + Type *PreallocatedType = nullptr; ArgListEntry() : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), - IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false), - IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false) {} + IsNest(false), IsByVal(false), IsInAlloca(false), + IsPreallocated(false), IsReturned(false), IsSwiftSelf(false), + IsSwiftError(false), IsCFGuardTarget(false) {} void setAttributes(const CallBase *Call, unsigned ArgIdx); }; @@ -3578,6 +3581,7 @@ bool IsReturnValueUsed : 1; bool IsConvergent : 1; bool IsPatchPoint : 1; + bool IsPreallocated : 1; // IsTailCall should be modified by implementations of // TargetLowering::LowerCall that perform tail call conversions. @@ -3601,7 +3605,7 @@ CallLoweringInfo(SelectionDAG &DAG) : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), - IsPatchPoint(false), DAG(DAG) {} + IsPatchPoint(false), IsPreallocated(false), DAG(DAG) {} CallLoweringInfo &setDebugLoc(const SDLoc &dl) { DL = dl; @@ -3707,6 +3711,11 @@ return *this; } + CallLoweringInfo &setIsPreallocated(bool Value = true) { + IsPreallocated = Value; + return *this; + } + CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { IsPostTypeLegalization = Value; return *this; diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -626,6 +626,9 @@ /// Return the byval type for the specified function parameter. Type *getParamByValType(unsigned ArgNo) const; + /// Return the preallocated type for the specified function parameter. + Type *getParamPreallocatedType(unsigned ArgNo) const; + /// Get the stack alignment. MaybeAlign getStackAlignment(unsigned Index) const; diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1606,6 +1606,12 @@ return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType(); } + /// Extract the preallocated type for a call or parameter. + Type *getParamPreallocatedType(unsigned ArgNo) const { + Type *Ty = Attrs.getParamPreallocatedType(ArgNo); + return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType(); + } + /// Extract the number of dereferenceable bytes for a call or /// parameter (0=unknown). uint64_t getDereferenceableBytes(unsigned i) const { diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -127,6 +127,9 @@ /// additionally expand this pseudo after register allocation. HANDLE_TARGET_OPCODE(LOAD_STACK_GUARD) +HANDLE_TARGET_OPCODE(CALL_SETUP) +HANDLE_TARGET_OPCODE(CALL_ALLOC) + /// Call instruction with associated vm state for deoptimization and list /// of live pointers for relocation by the garbage collector. It is /// intended to support garbage collection with fully precise relocating diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1173,6 +1173,18 @@ let hasSideEffects = 0; bit isPseudo = 1; } +def CALL_SETUP : StandardPseudoInstruction { + let OutOperandList = (outs); + let InOperandList = (ins i32imm:$a); + let usesCustomInserter = 1; + let hasSideEffects = 1; +} +def CALL_ALLOC : StandardPseudoInstruction { + let OutOperandList = (outs ptr_rc:$loc); + let InOperandList = (ins i32imm:$a, i32imm:$b); + let usesCustomInserter = 1; + let hasSideEffects = 1; +} def LOCAL_ESCAPE : StandardPseudoInstruction { // This instruction is really just a label. It has to be part of the chain so // that it doesn't get dropped from the DAG, but it produces nothing and has diff --git a/llvm/include/llvm/Target/TargetCallingConv.td b/llvm/include/llvm/Target/TargetCallingConv.td --- a/llvm/include/llvm/Target/TargetCallingConv.td +++ b/llvm/include/llvm/Target/TargetCallingConv.td @@ -41,6 +41,11 @@ class CCIfByVal : CCIf<"ArgFlags.isByVal()", A> { } +/// CCIfPreallocated - If the current argument has Preallocated parameter attribute, +/// apply Action A. +class CCIfPreallocated : CCIf<"ArgFlags.isPreallocated()", A> { +} + /// CCIfSwiftSelf - If the current argument has swiftself parameter attribute, /// apply Action A. class CCIfSwiftSelf : CCIf<"ArgFlags.isSwiftSelf()", A> { diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -96,10 +96,12 @@ Flags.setSwiftError(); if (Attrs.hasAttribute(OpIdx, Attribute::ByVal)) Flags.setByVal(); + if (Attrs.hasAttribute(OpIdx, Attribute::Preallocated)) + Flags.setPreallocated(); if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca)) Flags.setInAlloca(); - if (Flags.isByVal() || Flags.isInAlloca()) { + if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { Type *ElementTy = cast(Arg.Ty)->getElementType(); auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType(); diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1205,6 +1205,8 @@ Flags.setCFGuardTarget(); if (Arg.IsByVal) Flags.setByVal(); + if (Arg.IsPreallocated) + Flags.setPreallocated(); if (Arg.IsInAlloca) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about @@ -1214,7 +1216,7 @@ // the various CC lowering callbacks. Flags.setByVal(); } - if (Arg.IsByVal || Arg.IsInAlloca) { + if (Arg.IsByVal || Arg.IsInAlloca || Arg.IsPreallocated) { PointerType *Ty = cast(Arg.Ty); Type *ElementTy = Ty->getElementType(); unsigned FrameSize = diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1873,9 +1873,6 @@ } SDValue SelectionDAG::getSrcValue(const Value *V) { - assert((!V || V->getType()->isPointerTy()) && - "SrcValue is not a pointer?"); - FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None); ID.AddPointer(V); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5792,6 +5792,45 @@ updateDAGForMaybeTailCall(MC); return; } + case Intrinsic::call_preallocated_setup: + case Intrinsic::call_preallocated_arg: { + const Value *CallSetup = + Intrinsic == Intrinsic::call_preallocated_setup ? &I : I.getOperand(0); + assert(cast(CallSetup)->getCalledFunction()->getIntrinsicID() == + Intrinsic::call_preallocated_setup && + "expected call_preallocated_setup Value"); + const CallBase *Call = nullptr; + for (auto U : CallSetup->users()) { + auto *UseCall = cast(U); + const Function *Fn = UseCall->getCalledFunction(); + if (Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) { + Call = UseCall; + break; + } + } + if (Call) { + SDValue SrcValue = DAG.getSrcValue(Call); + if (Intrinsic == Intrinsic::call_preallocated_setup) { + SDValue Res = + DAG.getNode(ISD::CALL_SETUP, sdl, MVT::Other, getRoot(), SrcValue); + setValue(&I, Res); + DAG.setRoot(Res); + } else { + SDValue Ops[3]; + Ops[0] = getRoot(); + Ops[1] = SrcValue; + Ops[2] = DAG.getTargetConstant(*cast(I.getArgOperand(1)), + sdl, MVT::i32); // arg index + SDValue Res = DAG.getNode( + ISD::CALL_ALLOC, sdl, + DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), + Ops); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + } + } + return; + } case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: { const auto &DI = cast(I); @@ -7112,7 +7151,9 @@ .setChain(getRoot()) .setCallee(RetTy, FTy, Callee, std::move(Args), CB) .setTailCall(isTailCall) - .setConvergent(CB.isConvergent()); + .setConvergent(CB.isConvergent()) + .setIsPreallocated( + CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0); std::pair Result = lowerInvokable(CLI, EHPadBB); if (Result.first.getNode()) { @@ -7638,9 +7679,9 @@ // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. // CFGuardTarget bundles are lowered in LowerCallTo. - assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt, - LLVMContext::OB_funclet, - LLVMContext::OB_cfguardtarget}) && + assert(!I.hasOperandBundlesOtherThan( + {LLVMContext::OB_deopt, LLVMContext::OB_funclet, + LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated}) && "Cannot lower calls with arbitrary operand bundles!"); SDValue Callee = getValue(I.getCalledValue()); @@ -8601,7 +8642,9 @@ .setChain(getRoot()) .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args)) .setDiscardResult(Call->use_empty()) - .setIsPatchPoint(IsPatchPoint); + .setIsPatchPoint(IsPatchPoint) + .setIsPreallocated( + Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0); } /// Add a stack map intrinsic call's live variable operands to a stackmap @@ -9121,6 +9164,8 @@ Flags.setCFGuardTarget(); if (Args[i].IsByVal) Flags.setByVal(); + if (Args[i].IsPreallocated) + Flags.setPreallocated(); if (Args[i].IsInAlloca) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about @@ -9130,7 +9175,7 @@ // in the various CC lowering callbacks. Flags.setByVal(); } - if (Args[i].IsByVal || Args[i].IsInAlloca) { + if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) { PointerType *Ty = cast(Args[i].Ty); Type *ElementTy = Ty->getElementType(); @@ -9328,13 +9373,14 @@ void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Op = getNonRegisterValue(V); assert((Op.getOpcode() != ISD::CopyFromReg || cast(Op.getOperand(1))->getReg() != Reg) && "Copy from a reg to the same reg!"); assert(!Register::isPhysicalRegister(Reg) && "Is a physreg"); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // If this is an InlineAsm we have to match the registers required, not the // notional registers required by the type. @@ -9629,12 +9675,21 @@ // in the various CC lowering callbacks. Flags.setByVal(); } + if (Arg.hasAttribute(Attribute::Preallocated)) { + Flags.setPreallocated(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. + Flags.setByVal(); + } if (F.getCallingConv() == CallingConv::X86_INTR) { // IA Interrupt passes frame (1st parameter) by value in the stack. if (ArgNo == 0) Flags.setByVal(); } - if (Flags.isByVal() || Flags.isInAlloca()) { + if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { Type *ElementTy = Arg.getParamByValType(); // For ByVal, size and alignment should be passed from FE. BE will diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -393,6 +393,10 @@ case ISD::GC_TRANSITION_END: return "gc_transition.end"; case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; case ISD::FREEZE: return "freeze"; + case ISD::CALL_SETUP: + return "call_setup"; + case ISD::CALL_ALLOC: + return "call_alloc"; // Bit manipulation case ISD::ABS: return "abs"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -110,14 +110,18 @@ IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet); IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest); IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal); + IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated); IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca); IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned); IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf); IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); Alignment = Call->getParamAlign(ArgIdx); ByValType = nullptr; - if (Call->paramHasAttr(ArgIdx, Attribute::ByVal)) + if (IsByVal) ByValType = Call->getParamByValType(ArgIdx); + PreallocatedType = nullptr; + if (IsPreallocated) + PreallocatedType = Call->getParamPreallocatedType(ArgIdx); } /// Generate a libcall taking the given operands as arguments and returning a diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -1434,6 +1434,10 @@ return getAttributes(Index+FirstArgIndex).getByValType(); } +Type *AttributeList::getParamPreallocatedType(unsigned Index) const { + return getAttributes(Index + FirstArgIndex).getPreallocatedType(); +} + MaybeAlign AttributeList::getStackAlignment(unsigned Index) const { return getAttributes(Index).getStackAlignment(); } diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -789,8 +789,9 @@ /// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP /// values are spilled on the stack. def CC_X86_32_Common : CallingConv<[ - // Handles byval parameters. + // Handles byval/preallocated parameters. CCIfByVal>, + CCIfPreallocated>, // The first 3 float or double arguments, if marked 'inreg' and if the call // is not a vararg call and if SSE2 is available, are passed in SSE registers. diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -57,7 +57,8 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { return !MF.getFrameInfo().hasVarSizedObjects() && - !MF.getInfo()->getHasPushSequences(); + !MF.getInfo()->getHasPushSequences() && + !MF.getInfo()->hasCallSetup(); } /// canSimplifyCallFramePseudos - If there is a reserved call frame, the @@ -67,6 +68,7 @@ bool X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { return hasReservedCallFrame(MF) || + MF.getInfo()->hasCallSetup() || (hasFP(MF) && !TRI->needsStackRealignment(MF)) || TRI->hasBasePointer(MF); } diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5630,6 +5630,39 @@ CurDAG->RemoveDeadNode(Node); return; } + case ISD::CALL_SETUP: { + auto MFI = CurDAG->getMachineFunction().getInfo(); + auto CallId = MFI->CallSetupIdForCallSite( + cast(Node->getOperand(1))->getValue()); + SDValue Chain = Node->getOperand(0); + SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32); + MachineSDNode *New = CurDAG->getMachineNode(TargetOpcode::CALL_SETUP, dl, + MVT::Other, CallIdValue, Chain); + ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain + CurDAG->RemoveDeadNode(Node); + return; + } + case ISD::CALL_ALLOC: { + auto MFI = CurDAG->getMachineFunction().getInfo(); + auto CallId = MFI->CallSetupIdForCallSite( + cast(Node->getOperand(1))->getValue()); + SDValue Chain = Node->getOperand(0); + SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32); + SDValue ArgIndex = Node->getOperand(2); + SDValue Ops[3]; + Ops[0] = CallIdValue; + Ops[1] = ArgIndex; + Ops[2] = Chain; + MachineSDNode *New = CurDAG->getMachineNode( + TargetOpcode::CALL_ALLOC, dl, + CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()), + MVT::Other), + Ops); + ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer + ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain + CurDAG->RemoveDeadNode(Node); + return; + } } SelectCode(Node); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3893,11 +3893,28 @@ if (ArgLocs.back().getLocMemOffset() != 0) report_fatal_error("any parameter with the inalloca attribute must be " "the only memory argument"); + } else if (CLI.IsPreallocated) { + if (!ArgLocs.back().isMemLoc()) { + report_fatal_error("cannot use preallocated attribute on a register " + "parameter"); + } + SmallVector PreallocatedOffsets; + for (size_t i = 0; i < CLI.OutVals.size(); ++i) { + if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) { + PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset()); + } + } + auto MFI = DAG.getMachineFunction().getInfo(); + size_t CallSetupId = MFI->CallSetupIdForCallSite(CLI.CB); + MFI->SetCallSetupStackSize(CallSetupId, NumBytes); + MFI->SetCallSetupArgOffsets(CallSetupId, PreallocatedOffsets); + NumBytesToPush = 0; } - if (!IsSibcall && !IsMustTail) + if (!IsSibcall && !IsMustTail) { Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush, NumBytes - NumBytesToPush, dl); + } SDValue RetAddrFrIdx; // Load return address for tail calls. @@ -3920,9 +3937,9 @@ for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E; ++I, ++OutIndex) { assert(OutIndex < Outs.size() && "Invalid Out index"); - // Skip inalloca arguments, they have already been written. + // Skip inalloca/preallocated arguments, they have already been written. ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags; - if (Flags.isInAlloca()) + if (Flags.isInAlloca() || Flags.isPreallocated()) continue; CCValAssign &VA = ArgLocs[I]; @@ -4110,8 +4127,8 @@ assert(VA.isMemLoc()); SDValue Arg = OutVals[OutsIndex]; ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags; - // Skip inalloca arguments. They don't require any work. - if (Flags.isInAlloca()) + // Skip inalloca/preallocated arguments. They don't require any work. + if (Flags.isInAlloca() || Flags.isPreallocated()) continue; // Create frame index. int32_t Offset = VA.getLocMemOffset()+FPDiff; @@ -33061,6 +33078,38 @@ BB->addLiveIn(BasePtr); return BB; } + case TargetOpcode::CALL_SETUP: { + assert(Subtarget.is32Bit() && "callsetup only used in 32-bit"); + auto MFI = MF->getInfo(); + MFI->setHasCallSetup(true); + // Offsets from the stack pointer may be wrong because we have adjusted it + MFI->setForceFramePointer(true); + int64_t CallSetupId = MI.getOperand(0).getImm(); + size_t StackAdjustment = MFI->GetCallSetupStackSize(CallSetupId); + assert(StackAdjustment != 0 && "0 stack adjustment"); + LLVM_DEBUG(dbgs() << "CALL_SETUP stack adjustment " << StackAdjustment + << "\n"); + BuildMI(*BB, MI, DL, TII->get(X86::SUB32ri), X86::ESP) + .addReg(X86::ESP) + .addImm(StackAdjustment); + MI.eraseFromParent(); + return BB; + } + case TargetOpcode::CALL_ALLOC: { + assert(Subtarget.is32Bit() && "callsetup only used in 32-bit"); + int64_t CallSetupId = MI.getOperand(1).getImm(); + int64_t ArgIdx = MI.getOperand(2).getImm(); + auto MFI = MF->getInfo(); + size_t ArgOffset = MFI->GetCallSetupArgOffsets(CallSetupId)[ArgIdx]; + LLVM_DEBUG(dbgs() << "CALL_ALLOC arg index " << ArgIdx << ", arg offset " + << ArgOffset << "\n"); + // copy stack pointer + offset into reg + addRegOffset( + BuildMI(*BB, MI, DL, TII->get(X86::LEA32r), MI.getOperand(0).getReg()), + X86::ESP, false, ArgOffset); + MI.eraseFromParent(); + return BB; + } } } diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -103,6 +103,14 @@ /// True if this function has WIN_ALLOCA instructions. bool HasWinAlloca = false; + /// + bool HasCallSetup = false; + + size_t CallSetupNextId = 0; + ValueMap CallSetupIds; + DenseMap CallSetupStackSizes; + DenseMap> CallSetupArgOffsets; + private: /// ForwardedMustTailRegParms - A list of virtual and physical registers /// that must be forwarded to every musttail call. @@ -184,6 +192,40 @@ bool hasWinAlloca() const { return HasWinAlloca; } void setHasWinAlloca(bool v) { HasWinAlloca = v; } + + bool hasCallSetup() const { return HasCallSetup; } + void setHasCallSetup(bool v) { HasCallSetup = v; } + + size_t CallSetupIdForCallSite(const Value *CS) { + auto Id = CallSetupIds.find(CS); + if (Id == CallSetupIds.end()) { + size_t NewId = CallSetupNextId++; + CallSetupIds.insert({CS, NewId}); + return NewId; + } else { + return Id->second; + } + } + + void SetCallSetupStackSize(size_t Id, size_t StackSize) { + CallSetupStackSizes[Id] = StackSize; + } + + size_t GetCallSetupStackSize(const size_t Id) { + assert(CallSetupStackSizes.find(Id) != CallSetupStackSizes.end() && + "stack size not set"); + return CallSetupStackSizes[Id]; + } + + void SetCallSetupArgOffsets(size_t Id, SmallVector AO) { + CallSetupArgOffsets[Id] = AO; + } + + const SmallVector &GetCallSetupArgOffsets(const size_t Id) { + assert(CallSetupArgOffsets.find(Id) != CallSetupArgOffsets.end() && + "arg offsets not set"); + return CallSetupArgOffsets[Id]; + } }; } // End llvm namespace diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -627,18 +627,22 @@ } bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { - const MachineFrameInfo &MFI = MF.getFrameInfo(); - - if (!EnableBasePointer) - return false; - - // When we need stack realignment, we can't address the stack from the frame - // pointer. When we have dynamic allocas or stack-adjusting inline asm, we - // can't address variables from the stack pointer. MS inline asm can - // reference locals while also adjusting the stack pointer. When we can't - // use both the SP and the FP, we need a separate base pointer register. - bool CantUseFP = needsStackRealignment(MF); - return CantUseFP && CantUseSP(MFI); + const X86MachineFunctionInfo *X86FI = MF.getInfo(); + if (X86FI->hasCallSetup()) + return true; + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + + if (!EnableBasePointer) + return false; + + // When we need stack realignment, we can't address the stack from the frame + // pointer. When we have dynamic allocas or stack-adjusting inline asm, we + // can't address variables from the stack pointer. MS inline asm can + // reference locals while also adjusting the stack pointer. When we can't + // use both the SP and the FP, we need a separate base pointer register. + bool CantUseFP = needsStackRealignment(MF); + return CantUseFP && CantUseSP(MFI); } bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { diff --git a/llvm/test/CodeGen/X86/preallocated.ll b/llvm/test/CodeGen/X86/preallocated.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/preallocated.ll @@ -0,0 +1,80 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s + +declare token @llvm.call.preallocated.setup(i32) +declare i8* @llvm.call.preallocated.arg(token, i32) + +%Foo = type { i32, i32 } + +declare void @f(%Foo* preallocated(%Foo)) +declare void @g(%Foo* preallocated(%Foo), i32) +declare void @h(%Foo* sret, %Foo* preallocated(%Foo)) +declare void @init(%Foo*) + +define void @preallocated() { +; CHECK-LABEL: _preallocated: +entry: +; CHECK: subl $8, %esp + %t = call token @llvm.call.preallocated.setup(i32 1) +; CHECK: leal (%esp), [[REGISTER:%[a-z]+]] + %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) + %b = bitcast i8* %a to %Foo* +; CHECK: pushl [[REGISTER]] +; CHECK: calll _init + call void @init(%Foo* %b) +; CHECK-NOT: subl {{\$[0-9]+}}, %esp +; CHECK-NOT: pushl +; CHECK: calll _f + call void @f(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)] + ret void +} + +define void @one_preallocated_one_normal() { +; CHECK-LABEL: _one_preallocated_one_normal: +entry: +; CHECK: subl $12, %esp + %t = call token @llvm.call.preallocated.setup(i32 1) +; CHECK: leal (%esp), [[REGISTER:%[a-z]+]] + %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) + %b = bitcast i8* %a to %Foo* +; CHECK: pushl [[REGISTER]] +; CHECK: calll _init + call void @init(%Foo* %b) +; CHECK-NOT: subl {{\$[0-9]+}}, %esp +; CHECK-NOT: pushl +; CHECK: movl $2, 8(%esp) +; CHECK: calll _g + call void @g(%Foo* preallocated(%Foo) %b, i32 2) ["preallocated"(token %t)] + ret void +} + +define void @nested() { +; CHECK-LABEL: _nested: +entry: + %tmp = alloca %Foo + + %t1 = call token @llvm.call.preallocated.setup(i32 1) +; CHECK: subl $12, %esp + %a1 = call i8* @llvm.call.preallocated.arg(token %t1, i32 0) + %b1 = bitcast i8* %a1 to %Foo* +; CHECK: leal 4(%esp), [[REGISTER1:%[a-z]+]] + + %t2 = call token @llvm.call.preallocated.setup(i32 1) +; CHECK: subl $12, %esp + %a2 = call i8* @llvm.call.preallocated.arg(token %t2, i32 0) +; CHECK: leal 4(%esp), [[REGISTER2:%[a-z]+]] + %b2 = bitcast i8* %a2 to %Foo* + + call void @init(%Foo* %b2) +; CHECK: pushl [[REGISTER2]] +; CHECK: calll _init + + call void @h(%Foo* %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t2)] +; CHECK-NOT: subl {{\$[0-9]+}}, %esp +; CHECK-NOT: pushl +; CHECK: calll _h + call void @h(%Foo* %tmp, %Foo* preallocated(%Foo) %b1) ["preallocated"(token %t1)] +; CHECK-NOT: subl {{\$[0-9]+}}, %esp +; CHECK-NOT: pushl +; CHECK: calll _h + ret void +}