diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -797,6 +797,12 @@ /// pointer, and a SRCVALUE. VAEND, VASTART, + // PREALLOCATED_SETUP - This has one operand: an input chain. + PREALLOCATED_SETUP, + // PREALLOCATED_ARG - This has 3 operands: an input chain, a SRCVALUE + // with the preallocated call, and a constant int. + PREALLOCATED_ARG, + /// SRCVALUE - This is a node type that holds a Value* that is used to /// make reference to a value in the LLVM IR. SRCVALUE, diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h --- a/llvm/include/llvm/CodeGen/TargetCallingConv.h +++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h @@ -35,6 +35,7 @@ unsigned IsReturned : 1; ///< Always returned unsigned IsSplit : 1; unsigned IsInAlloca : 1; ///< Passed with inalloca + unsigned IsPreallocated : 1; ///< ByVal without the copy unsigned IsSplitEnd : 1; ///< Last part of a split unsigned IsSwiftSelf : 1; ///< Swift self parameter unsigned IsSwiftError : 1; ///< Swift error parameter @@ -56,9 +57,9 @@ public: ArgFlagsTy() : IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsNest(0), - IsReturned(0), IsSplit(0), IsInAlloca(0), IsSplitEnd(0), - IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0), IsHva(0), - IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0), + IsReturned(0), IsSplit(0), IsInAlloca(0), IsPreallocated(0), + IsSplitEnd(0), IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0), + IsHva(0), IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0), IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), IsCopyElisionCandidate(0), IsPointer(0), ByValSize(0), PointerAddrSpace(0) { @@ -83,6 +84,9 @@ bool isInAlloca() const { return IsInAlloca; } void setInAlloca() { IsInAlloca = 1; } + bool isPreallocated() const { return IsPreallocated; } + void setPreallocated() { IsPreallocated = 1; } + bool isSwiftSelf() const { return IsSwiftSelf; } void setSwiftSelf() { IsSwiftSelf = 1; } diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -273,17 +273,20 @@ bool IsNest : 1; bool IsByVal : 1; bool IsInAlloca : 1; + bool IsPreallocated : 1; bool IsReturned : 1; bool IsSwiftSelf : 1; bool IsSwiftError : 1; bool IsCFGuardTarget : 1; MaybeAlign Alignment = None; Type *ByValType = nullptr; + Type *PreallocatedType = nullptr; ArgListEntry() : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), - IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false), - IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false) {} + IsNest(false), IsByVal(false), IsInAlloca(false), + IsPreallocated(false), IsReturned(false), IsSwiftSelf(false), + IsSwiftError(false), IsCFGuardTarget(false) {} void setAttributes(const CallBase *Call, unsigned ArgIdx); }; @@ -3608,6 +3611,7 @@ bool IsReturnValueUsed : 1; bool IsConvergent : 1; bool IsPatchPoint : 1; + bool IsPreallocated : 1; // IsTailCall should be modified by implementations of // TargetLowering::LowerCall that perform tail call conversions. @@ -3631,7 +3635,7 @@ CallLoweringInfo(SelectionDAG &DAG) : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), - IsPatchPoint(false), DAG(DAG) {} + IsPatchPoint(false), IsPreallocated(false), DAG(DAG) {} CallLoweringInfo &setDebugLoc(const SDLoc &dl) { DL = dl; @@ -3737,6 +3741,11 @@ return *this; } + CallLoweringInfo &setIsPreallocated(bool Value = true) { + IsPreallocated = Value; + return *this; + } + CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { IsPostTypeLegalization = Value; return *this; diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h --- a/llvm/include/llvm/IR/Argument.h +++ b/llvm/include/llvm/IR/Argument.h @@ -71,9 +71,9 @@ /// Return true if this argument has the swifterror attribute. bool hasSwiftErrorAttr() const; - /// Return true if this argument has the byval attribute or inalloca + /// Return true if this argument has the byval, inalloca, or preallocated /// attribute. These attributes represent arguments being passed by value. - bool hasByValOrInAllocaAttr() const; + bool isPassPointeeByValue() const; /// If this is a byval or inalloca argument, return its alignment. /// FIXME: Remove this function once transition to Align is over. diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -626,6 +626,9 @@ /// Return the byval type for the specified function parameter. Type *getParamByValType(unsigned ArgNo) const; + /// Return the preallocated type for the specified function parameter. + Type *getParamPreallocatedType(unsigned ArgNo) const; + /// Get the stack alignment. MaybeAlign getStackAlignment(unsigned Index) const; diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1602,6 +1602,12 @@ return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType(); } + /// Extract the preallocated type for a call or parameter. + Type *getParamPreallocatedType(unsigned ArgNo) const { + Type *Ty = Attrs.getParamPreallocatedType(ArgNo); + return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType(); + } + /// Extract the number of dereferenceable bytes for a call or /// parameter (0=unknown). uint64_t getDereferenceableBytes(unsigned i) const { diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -127,6 +127,12 @@ /// additionally expand this pseudo after register allocation. HANDLE_TARGET_OPCODE(LOAD_STACK_GUARD) +/// These are used to support call sites that must have the stack adjusted +/// before the call (e.g. to initialize an argument passed by value). +/// See llvm.call.preallocated.{setup,arg} in the LangRef for more details. +HANDLE_TARGET_OPCODE(PREALLOCATED_SETUP) +HANDLE_TARGET_OPCODE(PREALLOCATED_ARG) + /// Call instruction with associated vm state for deoptimization and list /// of live pointers for relocation by the garbage collector. It is /// intended to support garbage collection with fully precise relocating diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1173,6 +1173,18 @@ let hasSideEffects = 0; bit isPseudo = 1; } +def PREALLOCATED_SETUP : StandardPseudoInstruction { + let OutOperandList = (outs); + let InOperandList = (ins i32imm:$a); + let usesCustomInserter = 1; + let hasSideEffects = 1; +} +def PREALLOCATED_ARG : StandardPseudoInstruction { + let OutOperandList = (outs ptr_rc:$loc); + let InOperandList = (ins i32imm:$a, i32imm:$b); + let usesCustomInserter = 1; + let hasSideEffects = 1; +} def LOCAL_ESCAPE : StandardPseudoInstruction { // This instruction is really just a label. It has to be part of the chain so // that it doesn't get dropped from the DAG, but it produces nothing and has diff --git a/llvm/include/llvm/Target/TargetCallingConv.td b/llvm/include/llvm/Target/TargetCallingConv.td --- a/llvm/include/llvm/Target/TargetCallingConv.td +++ b/llvm/include/llvm/Target/TargetCallingConv.td @@ -41,6 +41,11 @@ class CCIfByVal : CCIf<"ArgFlags.isByVal()", A> { } +/// CCIfPreallocated - If the current argument has Preallocated parameter attribute, +/// apply Action A. +class CCIfPreallocated : CCIf<"ArgFlags.isPreallocated()", A> { +} + /// CCIfSwiftSelf - If the current argument has swiftself parameter attribute, /// apply Action A. class CCIfSwiftSelf : CCIf<"ArgFlags.isSwiftSelf()", A> { diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -673,7 +673,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { // No interprocedural analysis is done at the moment. - if (!A.hasByValOrInAllocaAttr()) { + if (!A.isPassPointeeByValue()) { ++ObjectVisitorArgument; return unknown(); } diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2339,7 +2339,7 @@ // A byval, inalloca, or nonnull argument is never null. if (const Argument *A = dyn_cast(V)) - if (A->hasByValOrInAllocaAttr() || A->hasNonNullAttr()) + if (A->isPassPointeeByValue() || A->hasNonNullAttr()) return true; // A Load tagged with nonnull metadata is never null. diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -96,10 +96,12 @@ Flags.setSwiftError(); if (Attrs.hasAttribute(OpIdx, Attribute::ByVal)) Flags.setByVal(); + if (Attrs.hasAttribute(OpIdx, Attribute::Preallocated)) + Flags.setPreallocated(); if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca)) Flags.setInAlloca(); - if (Flags.isByVal() || Flags.isInAlloca()) { + if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { Type *ElementTy = cast(Arg.Ty)->getElementType(); auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1873,9 +1873,6 @@ } SDValue SelectionDAG::getSrcValue(const Value *V) { - assert((!V || V->getType()->isPointerTy()) && - "SrcValue is not a pointer?"); - FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None); ID.AddPointer(V); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5796,6 +5796,45 @@ updateDAGForMaybeTailCall(MC); return; } + case Intrinsic::call_preallocated_setup: + case Intrinsic::call_preallocated_arg: { + const Value *PreallocatedCall = + Intrinsic == Intrinsic::call_preallocated_setup ? &I : I.getOperand(0); + assert(cast(PreallocatedCall) + ->getCalledFunction() + ->getIntrinsicID() == Intrinsic::call_preallocated_setup && + "expected call_preallocated_setup Value"); + const CallBase *Call = nullptr; + for (auto U : PreallocatedCall->users()) { + auto *UseCall = cast(U); + const Function *Fn = UseCall->getCalledFunction(); + if (Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) { + Call = UseCall; + break; + } + } + assert(Call && "expected corresponding call to preallocated setup/arg"); + SDValue SrcValue = DAG.getSrcValue(Call); + if (Intrinsic == Intrinsic::call_preallocated_setup) { + SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other, + getRoot(), SrcValue); + setValue(&I, Res); + DAG.setRoot(Res); + } else { + SDValue Ops[3]; + Ops[0] = getRoot(); + Ops[1] = SrcValue; + Ops[2] = DAG.getTargetConstant(*cast(I.getArgOperand(1)), + sdl, MVT::i32); // arg index + SDValue Res = DAG.getNode( + ISD::PREALLOCATED_ARG, sdl, + DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), + Ops); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + } + return; + } case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: { const auto &DI = cast(I); @@ -7116,7 +7155,9 @@ .setChain(getRoot()) .setCallee(RetTy, FTy, Callee, std::move(Args), CB) .setTailCall(isTailCall) - .setConvergent(CB.isConvergent()); + .setConvergent(CB.isConvergent()) + .setIsPreallocated( + CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0); std::pair Result = lowerInvokable(CLI, EHPadBB); if (Result.first.getNode()) { @@ -7642,9 +7683,9 @@ // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. // CFGuardTarget bundles are lowered in LowerCallTo. - assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt, - LLVMContext::OB_funclet, - LLVMContext::OB_cfguardtarget}) && + assert(!I.hasOperandBundlesOtherThan( + {LLVMContext::OB_deopt, LLVMContext::OB_funclet, + LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated}) && "Cannot lower calls with arbitrary operand bundles!"); SDValue Callee = getValue(I.getCalledOperand()); @@ -8605,7 +8646,9 @@ .setChain(getRoot()) .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args)) .setDiscardResult(Call->use_empty()) - .setIsPatchPoint(IsPatchPoint); + .setIsPatchPoint(IsPatchPoint) + .setIsPreallocated( + Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0); } /// Add a stack map intrinsic call's live variable operands to a stackmap @@ -9125,6 +9168,8 @@ Flags.setCFGuardTarget(); if (Args[i].IsByVal) Flags.setByVal(); + if (Args[i].IsPreallocated) + Flags.setPreallocated(); if (Args[i].IsInAlloca) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about @@ -9134,7 +9179,7 @@ // in the various CC lowering callbacks. Flags.setByVal(); } - if (Args[i].IsByVal || Args[i].IsInAlloca) { + if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) { PointerType *Ty = cast(Args[i].Ty); Type *ElementTy = Ty->getElementType(); @@ -9633,12 +9678,21 @@ // in the various CC lowering callbacks. Flags.setByVal(); } + if (Arg.hasAttribute(Attribute::Preallocated)) { + Flags.setPreallocated(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. + Flags.setByVal(); + } if (F.getCallingConv() == CallingConv::X86_INTR) { // IA Interrupt passes frame (1st parameter) by value in the stack. if (ArgNo == 0) Flags.setByVal(); } - if (Flags.isByVal() || Flags.isInAlloca()) { + if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { Type *ElementTy = Arg.getParamByValType(); // For ByVal, size and alignment should be passed from FE. BE will diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -393,6 +393,10 @@ case ISD::GC_TRANSITION_END: return "gc_transition.end"; case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; case ISD::FREEZE: return "freeze"; + case ISD::PREALLOCATED_SETUP: + return "call_setup"; + case ISD::PREALLOCATED_ARG: + return "call_alloc"; // Bit manipulation case ISD::ABS: return "abs"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -110,14 +110,18 @@ IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet); IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest); IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal); + IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated); IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca); IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned); IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf); IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); Alignment = Call->getParamAlign(ArgIdx); ByValType = nullptr; - if (Call->paramHasAttr(ArgIdx, Attribute::ByVal)) + if (IsByVal) ByValType = Call->getParamByValType(ArgIdx); + PreallocatedType = nullptr; + if (IsPreallocated) + PreallocatedType = Call->getParamPreallocatedType(ArgIdx); } /// Generate a libcall taking the given operands as arguments and returning a diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -1443,6 +1443,10 @@ return getAttributes(Index+FirstArgIndex).getByValType(); } +Type *AttributeList::getParamPreallocatedType(unsigned Index) const { + return getAttributes(Index + FirstArgIndex).getPreallocatedType(); +} + MaybeAlign AttributeList::getStackAlignment(unsigned Index) const { return getAttributes(Index).getStackAlignment(); } diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -114,11 +114,12 @@ return hasAttribute(Attribute::InAlloca); } -bool Argument::hasByValOrInAllocaAttr() const { +bool Argument::isPassPointeeByValue() const { if (!getType()->isPointerTy()) return false; AttributeList Attrs = getParent()->getAttributes(); return Attrs.hasParamAttribute(getArgNo(), Attribute::ByVal) || - Attrs.hasParamAttribute(getArgNo(), Attribute::InAlloca); + Attrs.hasParamAttribute(getArgNo(), Attribute::InAlloca) || + Attrs.hasParamAttribute(getArgNo(), Attribute::Preallocated); } unsigned Argument::getParamAlignment() const { diff --git a/llvm/lib/IR/Mangler.cpp b/llvm/lib/IR/Mangler.cpp --- a/llvm/lib/IR/Mangler.cpp +++ b/llvm/lib/IR/Mangler.cpp @@ -98,7 +98,7 @@ AI != AE; ++AI) { Type *Ty = AI->getType(); // 'Dereference' type in case of byval or inalloca parameter attribute. - if (AI->hasByValOrInAllocaAttr()) + if (AI->isPassPointeeByValue()) Ty = cast(Ty)->getElementType(); // Size should be aligned to pointer size. unsigned PtrSize = DL.getPointerSize(); diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp --- a/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -429,7 +429,7 @@ for (auto &Arg : F.args()) { if (!isSupportedType(DL, TLI, Arg.getType())) return false; - if (Arg.hasByValOrInAllocaAttr()) + if (Arg.isPassPointeeByValue()) return false; } diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -789,8 +789,9 @@ /// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP /// values are spilled on the stack. def CC_X86_32_Common : CallingConv<[ - // Handles byval parameters. + // Handles byval/preallocated parameters. CCIfByVal>, + CCIfPreallocated>, // The first 3 float or double arguments, if marked 'inreg' and if the call // is not a vararg call and if SSE2 is available, are passed in SSE registers. diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -57,7 +57,8 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { return !MF.getFrameInfo().hasVarSizedObjects() && - !MF.getInfo()->getHasPushSequences(); + !MF.getInfo()->getHasPushSequences() && + !MF.getInfo()->hasPreallocatedCall(); } /// canSimplifyCallFramePseudos - If there is a reserved call frame, the @@ -67,6 +68,7 @@ bool X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { return hasReservedCallFrame(MF) || + MF.getInfo()->hasPreallocatedCall() || (hasFP(MF) && !TRI->needsStackRealignment(MF)) || TRI->hasBasePointer(MF); } diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5625,6 +5625,39 @@ CurDAG->RemoveDeadNode(Node); return; } + case ISD::PREALLOCATED_SETUP: { + auto MFI = CurDAG->getMachineFunction().getInfo(); + auto CallId = MFI->PreallocatedIdForCallSite( + cast(Node->getOperand(1))->getValue()); + SDValue Chain = Node->getOperand(0); + SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32); + MachineSDNode *New = CurDAG->getMachineNode( + TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain); + ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain + CurDAG->RemoveDeadNode(Node); + return; + } + case ISD::PREALLOCATED_ARG: { + auto MFI = CurDAG->getMachineFunction().getInfo(); + auto CallId = MFI->PreallocatedIdForCallSite( + cast(Node->getOperand(1))->getValue()); + SDValue Chain = Node->getOperand(0); + SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32); + SDValue ArgIndex = Node->getOperand(2); + SDValue Ops[3]; + Ops[0] = CallIdValue; + Ops[1] = ArgIndex; + Ops[2] = Chain; + MachineSDNode *New = CurDAG->getMachineNode( + TargetOpcode::PREALLOCATED_ARG, dl, + CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()), + MVT::Other), + Ops); + ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer + ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain + CurDAG->RemoveDeadNode(Node); + return; + } } SelectCode(Node); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3893,6 +3893,22 @@ if (ArgLocs.back().getLocMemOffset() != 0) report_fatal_error("any parameter with the inalloca attribute must be " "the only memory argument"); + } else if (CLI.IsPreallocated) { + if (!ArgLocs.back().isMemLoc()) { + report_fatal_error("cannot use preallocated attribute on a register " + "parameter"); + } + SmallVector PreallocatedOffsets; + for (size_t i = 0; i < CLI.OutVals.size(); ++i) { + if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) { + PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset()); + } + } + auto MFI = DAG.getMachineFunction().getInfo(); + size_t PreallocatedId = MFI->PreallocatedIdForCallSite(CLI.CB); + MFI->SetPreallocatedStackSize(PreallocatedId, NumBytes); + MFI->SetPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets); + NumBytesToPush = 0; } if (!IsSibcall && !IsMustTail) @@ -3920,9 +3936,9 @@ for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E; ++I, ++OutIndex) { assert(OutIndex < Outs.size() && "Invalid Out index"); - // Skip inalloca arguments, they have already been written. + // Skip inalloca/preallocated arguments, they have already been written. ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags; - if (Flags.isInAlloca()) + if (Flags.isInAlloca() || Flags.isPreallocated()) continue; CCValAssign &VA = ArgLocs[I]; @@ -4110,8 +4126,8 @@ assert(VA.isMemLoc()); SDValue Arg = OutVals[OutsIndex]; ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags; - // Skip inalloca arguments. They don't require any work. - if (Flags.isInAlloca()) + // Skip inalloca/preallocated arguments. They don't require any work. + if (Flags.isInAlloca() || Flags.isPreallocated()) continue; // Create frame index. int32_t Offset = VA.getLocMemOffset()+FPDiff; @@ -33072,6 +33088,38 @@ BB->addLiveIn(BasePtr); return BB; } + case TargetOpcode::PREALLOCATED_SETUP: { + assert(Subtarget.is32Bit() && "preallocated only used in 32-bit"); + auto MFI = MF->getInfo(); + MFI->setHasPreallocatedCall(true); + // Offsets from the stack pointer may be wrong because we have adjusted it + MFI->setForceFramePointer(true); + int64_t PreallocatedId = MI.getOperand(0).getImm(); + size_t StackAdjustment = MFI->GetPreallocatedStackSize(PreallocatedId); + assert(StackAdjustment != 0 && "0 stack adjustment"); + LLVM_DEBUG(dbgs() << "PREALLOCATED_SETUP stack adjustment " + << StackAdjustment << "\n"); + BuildMI(*BB, MI, DL, TII->get(X86::SUB32ri), X86::ESP) + .addReg(X86::ESP) + .addImm(StackAdjustment); + MI.eraseFromParent(); + return BB; + } + case TargetOpcode::PREALLOCATED_ARG: { + assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit"); + int64_t PreallocatedId = MI.getOperand(1).getImm(); + int64_t ArgIdx = MI.getOperand(2).getImm(); + auto MFI = MF->getInfo(); + size_t ArgOffset = MFI->GetPreallocatedArgOffsets(PreallocatedId)[ArgIdx]; + LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx + << ", arg offset " << ArgOffset << "\n"); + // copy stack pointer + offset into reg + addRegOffset( + BuildMI(*BB, MI, DL, TII->get(X86::LEA32r), MI.getOperand(0).getReg()), + X86::ESP, false, ArgOffset); + MI.eraseFromParent(); + return BB; + } } } diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -103,6 +103,14 @@ /// True if this function has WIN_ALLOCA instructions. bool HasWinAlloca = false; + /// True if this function has any preallocated calls. + bool HasPreallocatedCall = false; + + size_t PreallocatedNextId = 0; + ValueMap PreallocatedIds; + DenseMap PreallocatedStackSizes; + DenseMap> PreallocatedArgOffsets; + private: /// ForwardedMustTailRegParms - A list of virtual and physical registers /// that must be forwarded to every musttail call. @@ -184,6 +192,40 @@ bool hasWinAlloca() const { return HasWinAlloca; } void setHasWinAlloca(bool v) { HasWinAlloca = v; } + + bool hasPreallocatedCall() const { return HasPreallocatedCall; } + void setHasPreallocatedCall(bool v) { HasPreallocatedCall = v; } + + size_t PreallocatedIdForCallSite(const Value *CS) { + auto Id = PreallocatedIds.find(CS); + if (Id == PreallocatedIds.end()) { + size_t NewId = PreallocatedNextId++; + PreallocatedIds.insert({CS, NewId}); + return NewId; + } else { + return Id->second; + } + } + + void SetPreallocatedStackSize(size_t Id, size_t StackSize) { + PreallocatedStackSizes[Id] = StackSize; + } + + size_t GetPreallocatedStackSize(const size_t Id) { + assert(PreallocatedStackSizes.find(Id) != PreallocatedStackSizes.end() && + "stack size not set"); + return PreallocatedStackSizes[Id]; + } + + void SetPreallocatedArgOffsets(size_t Id, SmallVector AO) { + PreallocatedArgOffsets[Id] = AO; + } + + const SmallVector &GetPreallocatedArgOffsets(const size_t Id) { + assert(PreallocatedArgOffsets.find(Id) != PreallocatedArgOffsets.end() && + "arg offsets not set"); + return PreallocatedArgOffsets[Id]; + } }; } // End llvm namespace diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -627,18 +627,22 @@ } bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { - const MachineFrameInfo &MFI = MF.getFrameInfo(); - - if (!EnableBasePointer) - return false; - - // When we need stack realignment, we can't address the stack from the frame - // pointer. When we have dynamic allocas or stack-adjusting inline asm, we - // can't address variables from the stack pointer. MS inline asm can - // reference locals while also adjusting the stack pointer. When we can't - // use both the SP and the FP, we need a separate base pointer register. - bool CantUseFP = needsStackRealignment(MF); - return CantUseFP && CantUseSP(MFI); + const X86MachineFunctionInfo *X86FI = MF.getInfo(); + if (X86FI->hasPreallocatedCall()) + return true; + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + + if (!EnableBasePointer) + return false; + + // When we need stack realignment, we can't address the stack from the frame + // pointer. When we have dynamic allocas or stack-adjusting inline asm, we + // can't address variables from the stack pointer. MS inline asm can + // reference locals while also adjusting the stack pointer. When we can't + // use both the SP and the FP, we need a separate base pointer register. + bool CantUseFP = needsStackRealignment(MF); + return CantUseFP && CantUseSP(MFI); } bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp --- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -289,7 +289,8 @@ bool Changed = false; for (Argument &Arg : Fn.args()) { - if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() && !Arg.hasByValOrInAllocaAttr()) { + if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() && + !Arg.isPassPointeeByValue()) { if (Arg.isUsedByMetadata()) { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); Changed = true; diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -839,7 +839,7 @@ // Treat byval or inalloca arguments the same, stores to them are dead at the // end of the function. for (Argument &AI : BB.getParent()->args()) - if (AI.hasByValOrInAllocaAttr()) + if (AI.isPassPointeeByValue()) DeadStackObjects.insert(&AI); const DataLayout &DL = BB.getModule()->getDataLayout(); @@ -1549,7 +1549,7 @@ // Treat byval or inalloca arguments the same as Allocas, stores to them are // dead at the end of the function. for (Argument &AI : F.args()) - if (AI.hasByValOrInAllocaAttr()) + if (AI.isPassPointeeByValue()) State.InvisibleToCallerBeforeRet.insert(&AI); return State; } diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1242,7 +1242,7 @@ Function *CalledFunc = CB.getCalledFunction(); for (Argument &Arg : CalledFunc->args()) { unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0; - if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) { + if (Align && !Arg.isPassPointeeByValue() && !Arg.hasNUses(0)) { if (!DTCalculated) { DT.recalculate(*CB.getCaller()); DTCalculated = true; diff --git a/llvm/test/CodeGen/X86/preallocated-nocall.ll b/llvm/test/CodeGen/X86/preallocated-nocall.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/preallocated-nocall.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s +; XFAIL: * + +declare token @llvm.call.preallocated.setup(i32) +declare i8* @llvm.call.preallocated.arg(token, i32) + +%Foo = type { i32, i32 } + +declare void @init(%Foo*) + + + +declare void @foo_p(%Foo* preallocated(%Foo)) + +define void @no_call() { +; CHECK-LABEL: _no_call: + %t = call token @llvm.call.preallocated.setup(i32 1) + %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo) + %b = bitcast i8* %a to %Foo* + call void @init(%Foo* %b) + ret void +} diff --git a/llvm/test/CodeGen/X86/preallocated-x64.ll b/llvm/test/CodeGen/X86/preallocated-x64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/preallocated-x64.ll @@ -0,0 +1,18 @@ +; RUN: not --crash llc %s -mtriple=x86_64-windows-msvc -o /dev/null 2>&1 | FileCheck %s + +declare token @llvm.call.preallocated.setup(i32) +declare i8* @llvm.call.preallocated.arg(token, i32) + +%Foo = type { i32, i32 } + +declare x86_thiscallcc void @f(i32, %Foo* preallocated(%Foo)) + +define void @g() { + %t = call token @llvm.call.preallocated.setup(i32 1) + %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo) + %b = bitcast i8* %a to %Foo* + call void @f(i32 0, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)] + ret void +} + +; CHECK: cannot use preallocated attribute on a register parameter \ No newline at end of file diff --git a/llvm/test/CodeGen/X86/preallocated.ll b/llvm/test/CodeGen/X86/preallocated.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/preallocated.ll @@ -0,0 +1,174 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s + +declare token @llvm.call.preallocated.setup(i32) +declare i8* @llvm.call.preallocated.arg(token, i32) + +%Foo = type { i32, i32 } + +declare void @init(%Foo*) + + + +declare void @foo_p(%Foo* preallocated(%Foo)) + +define void @one_preallocated() { +; CHECK-LABEL: _one_preallocated: + %t = call token @llvm.call.preallocated.setup(i32 1) + %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo) + %b = bitcast i8* %a to %Foo* +; CHECK: subl $8, %esp +; CHECK: calll _foo_p + call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)] + ret void +} + +define void @preallocated_with_store() { +; CHECK-LABEL: _preallocated_with_store: +; CHECK: subl $8, %esp + %t = call token @llvm.call.preallocated.setup(i32 1) +; CHECK: leal (%esp), [[REGISTER:%[a-z]+]] + %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo) + %b = bitcast i8* %a to %Foo* + %p0 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 + %p1 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 + store i32 13, i32* %p0 + store i32 42, i32* %p1 +; CHECK-DAG: movl $13, ([[REGISTER]]) +; CHECK-DAG: movl $42, 4([[REGISTER]]) +; CHECK-NOT: subl {{\$[0-9]+}}, %esp +; CHECK-NOT: pushl +; CHECK: calll _foo_p + call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)] + ret void +} + +define void @preallocated_with_init() { +; CHECK-LABEL: _preallocated_with_init: +; CHECK: subl $8, %esp + %t = call token @llvm.call.preallocated.setup(i32 1) +; CHECK: leal (%esp), [[REGISTER:%[a-z]+]] + %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo) + %b = bitcast i8* %a to %Foo* +; CHECK: pushl [[REGISTER]] +; CHECK: calll _init + call void @init(%Foo* %b) +; CHECK-NOT: subl {{\$[0-9]+}}, %esp +; CHECK-NOT: pushl +; CHECK: calll _foo_p + call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)] + ret void +} + +declare void @foo_p_p(%Foo* preallocated(%Foo), %Foo* preallocated(%Foo)) + +define void @two_preallocated() { +; CHECK-LABEL: _two_preallocated: + %t = call token @llvm.call.preallocated.setup(i32 2) + %a1 = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo) + %b1 = bitcast i8* %a1 to %Foo* + %a2 = call i8* @llvm.call.preallocated.arg(token %t, i32 1) preallocated(%Foo) + %b2 = bitcast i8* %a2 to %Foo* +; CHECK: subl $16, %esp +; CHECK: calll _foo_p_p + call void @foo_p_p(%Foo* preallocated(%Foo) %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t)] + ret void +} + +declare void @foo_p_int(%Foo* preallocated(%Foo), i32) + +define void @one_preallocated_one_normal() { +; CHECK-LABEL: _one_preallocated_one_normal: +; CHECK: subl $12, %esp + %t = call token @llvm.call.preallocated.setup(i32 1) +; CHECK: leal (%esp), [[REGISTER:%[a-z]+]] + %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo) + %b = bitcast i8* %a to %Foo* +; CHECK: pushl [[REGISTER]] +; CHECK: calll _init + call void @init(%Foo* %b) +; CHECK-NOT: subl {{\$[0-9]+}}, %esp +; CHECK-NOT: pushl +; CHECK: movl $2, 8(%esp) +; CHECK: calll _foo_p_int + call void @foo_p_int(%Foo* preallocated(%Foo) %b, i32 2) ["preallocated"(token %t)] + ret void +} + +declare void @foo_ret_p(%Foo* sret, %Foo* preallocated(%Foo)) + +define void @nested_with_init() { +; CHECK-LABEL: _nested_with_init: + %tmp = alloca %Foo + + %t1 = call token @llvm.call.preallocated.setup(i32 1) +; CHECK: subl $12, %esp + %a1 = call i8* @llvm.call.preallocated.arg(token %t1, i32 0) preallocated(%Foo) + %b1 = bitcast i8* %a1 to %Foo* +; CHECK: leal 4(%esp), [[REGISTER1:%[a-z]+]] + + %t2 = call token @llvm.call.preallocated.setup(i32 1) +; CHECK: subl $12, %esp + %a2 = call i8* @llvm.call.preallocated.arg(token %t2, i32 0) preallocated(%Foo) +; CHECK: leal 4(%esp), [[REGISTER2:%[a-z]+]] + %b2 = bitcast i8* %a2 to %Foo* + + call void @init(%Foo* %b2) +; CHECK: pushl [[REGISTER2]] +; CHECK: calll _init + + call void @foo_ret_p(%Foo* %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t2)] +; CHECK-NOT: subl {{\$[0-9]+}}, %esp +; CHECK-NOT: pushl +; CHECK: calll _foo_ret_p + call void @foo_ret_p(%Foo* %tmp, %Foo* preallocated(%Foo) %b1) ["preallocated"(token %t1)] +; CHECK-NOT: subl {{\$[0-9]+}}, %esp +; CHECK-NOT: pushl +; CHECK: calll _foo_ret_p + ret void +} + +declare void @foo_inreg_p(i32 inreg, %Foo* preallocated(%Foo)) + +define void @inreg() { +; CHECK-LABEL: _inreg: + %t = call token @llvm.call.preallocated.setup(i32 1) + %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo) + %b = bitcast i8* %a to %Foo* +; CHECK: subl $8, %esp +; CHECK: movl $9, %eax +; CHECK: calll _foo_inreg_p + call void @foo_inreg_p(i32 9, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)] + ret void +} + +declare x86_thiscallcc void @foo_thiscall_p(i8*, %Foo* preallocated(%Foo)) + +define void @thiscall() { +; CHECK-LABEL: _thiscall: + %t = call token @llvm.call.preallocated.setup(i32 1) + %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo) + %b = bitcast i8* %a to %Foo* +; CHECK: subl $8, %esp +; CHECK: xorl %ecx, %ecx +; CHECK: calll _foo_thiscall_p + call x86_thiscallcc void @foo_thiscall_p(i8* null, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)] + ret void +} + +declare x86_stdcallcc void @foo_stdcall_p(%Foo* preallocated(%Foo)) +declare x86_stdcallcc void @i(i32) + +define void @stdcall() { +; CHECK-LABEL: _stdcall: + %t = call token @llvm.call.preallocated.setup(i32 1) + %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo) + %b = bitcast i8* %a to %Foo* +; CHECK: subl $8, %esp +; CHECK: calll _foo_stdcall_p@8 + call x86_stdcallcc void @foo_stdcall_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)] +; CHECK-NOT: %esp +; CHECK: pushl +; CHECK: calll _i@4 + call x86_stdcallcc void @i(i32 0) + ret void +}