Index: lib/CodeGen/SelectionDAG/StatepointLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -111,6 +111,49 @@ return SpillSlot; } +/// There may be an existing slot which can be used. In particular, for +/// readonly uses (deopt) we can reuse space associated with argument passing +/// since we know we won't modify them within the callee (this function). +static SDValue getFixedSlot(SDValue Incoming, bool ReadOnly, + SelectionDAGBuilder &Builder) { + // We could potentially consider letting the GC rewrite these slots, but + // doing so might break assumptions elsewhere in the backend. This needs to + // be evaluated carefully. + if (!ReadOnly) + return SDValue(); + + // TODO: We could also consider arguments pushed on the stack for this call. + // In priciple, this could be fine, but we need to review the ABI carefully + // to be sure if the immutability of arguments is a ABI guarantee or an LLVM + // implementation accident. + + // Look to see if we were passed a deopt value in an argument that spilled to + // the stack by our caller. If so, we've got the perfect slot to record + // since it already contains the value we need. + auto *LNode = dyn_cast(Incoming); + if (!LNode) + return SDValue(); + if (LNode->getExtensionType() != ISD::NON_EXTLOAD || + LNode->isIndexed()) + return SDValue(); + assert(LNode->getOffset().isUndef() && "Shouldn't be indexed!"); + auto *FINode = dyn_cast(LNode->getBasePtr().getNode()); + if (!FINode) + return SDValue(); + const int FrameIdx = FINode->getIndex(); + auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo(); + if (!MFI->isImmutableObjectIndex(FrameIdx)) + return SDValue(); + + // This is a bit of a hack. We need to record the fact that this stack slot + // is a statepoint spill slot so that our target frame index gets interpreted + // as an Indirect stackmap entry, not a direct one. + MFI->markAsStatepointSpillSlotObjectIndex(FrameIdx); + + // We use TargetFrameIndex so that isel will not select it into LEA + return Builder.DAG.getTargetFrameIndex(FrameIdx, Incoming.getValueType()); +} + /// Utility function for reservePreviousStackSlotForValue. Tries to find /// stack slot index to which we have spilled value for previous statepoints. /// LookUpDepth specifies maximum DFS depth this function is allowed to look. @@ -194,11 +237,13 @@ /// This helps to avoid series of loads and stores that only serve to reshuffle /// values on the stack between calls. static void reservePreviousStackSlotForValue(const Value *IncomingValue, + bool ReadOnly, SelectionDAGBuilder &Builder) { SDValue Incoming = Builder.getValue(IncomingValue); - if (isa(Incoming) || isa(Incoming)) { + if (isa(Incoming) || isa(Incoming) || + getFixedSlot(Incoming, ReadOnly, Builder)) { // We won't need to spill this, so no need to check for previously // allocated stack slots return; @@ -371,6 +416,7 @@ /// either a deopt value or a gc value, the handling is the same. We special /// case constants and allocas, then fall back to spilling if required. static void lowerIncomingStatepointValue(SDValue Incoming, + bool ReadOnly, SmallVectorImpl &Ops, SelectionDAGBuilder &Builder) { SDValue Chain = Builder.getRoot(); @@ -389,6 +435,8 @@ // relocate the address of the alloca itself?) Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), Incoming.getValueType())); + } else if (SDValue Slot = getFixedSlot(Incoming, ReadOnly, Builder)) { + Ops.push_back(Slot); } else { // Otherwise, locate a spill slot and explicitly spill it so it // can be found by the runtime later. We currently do not support @@ -451,11 +499,11 @@ // doesn't change semantics at all. It is important for performance that we // reserve slots for both deopt and gc values before lowering either. for (const Value *V : SI.DeoptState) { - reservePreviousStackSlotForValue(V, Builder); + reservePreviousStackSlotForValue(V, true, Builder); } for (unsigned i = 0; i < SI.Bases.size(); ++i) { - reservePreviousStackSlotForValue(SI.Bases[i], Builder); - reservePreviousStackSlotForValue(SI.Ptrs[i], Builder); + reservePreviousStackSlotForValue(SI.Bases[i], false, Builder); + reservePreviousStackSlotForValue(SI.Ptrs[i], false, Builder); } // First, prefix the list with the number of unique values to be @@ -468,7 +516,7 @@ // what type of values are contained within. for (const Value *V : SI.DeoptState) { SDValue Incoming = Builder.getValue(V); - lowerIncomingStatepointValue(Incoming, Ops, Builder); + lowerIncomingStatepointValue(Incoming, true, Ops, Builder); } // Finally, go ahead and lower all the gc arguments. There's no prefixed @@ -478,10 +526,10 @@ // (base[0], ptr[0], base[1], ptr[1], ...) for (unsigned i = 0; i < SI.Bases.size(); ++i) { const Value *Base = SI.Bases[i]; - lowerIncomingStatepointValue(Builder.getValue(Base), Ops, Builder); + lowerIncomingStatepointValue(Builder.getValue(Base), false, Ops, Builder); const Value *Ptr = SI.Ptrs[i]; - lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, Builder); + lowerIncomingStatepointValue(Builder.getValue(Ptr), false, Ops, Builder); } // If there are any explicit spill slots passed to the statepoint, record Index: test/CodeGen/X86/statepoint-fixed-slots.ll =================================================================== --- test/CodeGen/X86/statepoint-fixed-slots.ll +++ test/CodeGen/X86/statepoint-fixed-slots.ll @@ -0,0 +1,25 @@ +; RUN: llc -O3 < %s | FileCheck %s +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +declare void @foo() + +declare void @use(...) + +define void @test(i32 %v, i32 %v2, i32 %v3, i32 %v4, i32 %v5, i32 %v6, i32 %v7, i32 %v8, i32 %v9, i32 %v10) gc "statepoint-example" { +; CHECK-LABEL: test +; Looking for the fact that the last 4 arguments are not loaded, and respilled in diffect locations +; CHECK: subq $24, %rsp +; CHECK: movl %edi, 20(%rsp) +; CHECK: movl %esi, 16(%rsp) +; CHECK: movl %edx, 12(%rsp) +; CHECK: movl %ecx, 8(%rsp) +; CHECK: movl %r8d, 4(%rsp) +; CHECK: movl %r9d, (%rsp) +; CHECK: callq foo + + %statepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 10, i32 %v, i32 %v2, i32 %v3, i32 %v4, i32 %v5, i32 %v6, i32 %v7, i32 %v8, i32 %v9, i32 %v10) + ret void +} + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)