Index: lib/CodeGen/SelectionDAG/StatepointLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -113,84 +113,137 @@ llvm_unreachable("infinite loop?"); } +/// Utility function for reservePreviousStackSlotForValue. Tries to find +/// stack slot index to which we have spilled value for previous statepoints. +/// LookUpDepth specifies maximum DFS depth this function is allowed to look. +static Optional findPreviousSpillSlot(const Value *Val, + SelectionDAGBuilder &Builder, + int LookUpDepth) { + // Can not look any futher - give up now + if (LookUpDepth <= 0) + return Optional(); + + // Spill location is known for gc relocates + if (isGCRelocate(Val)) { + GCRelocateOperands RelocOps(cast(Val)); + + FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = + Builder.FuncInfo.StatepointRelocatedValues[RelocOps.getStatepoint()]; + + auto It = SpillMap.find(RelocOps.getDerivedPtr()); + if (It == SpillMap.end()) + return Optional(); + + return It->second; + } + + // Look through cast instructions. Most of them are nops. + if (const CastInst *Cast = dyn_cast(Val)) { + return findPreviousSpillSlot(Cast->getOperand(0), Builder, LookUpDepth - 1); + } + + // Look through phi nodes + // All incoming values should have same known stack slot, otherwise result + // is unknown. + if (const PHINode *Phi = dyn_cast(Val)) { + Optional MergedResult = None; + + for (auto &IncomingValue : Phi->incoming_values()) { + Optional SpillSlot = + findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1); + if (!SpillSlot.hasValue()) + return Optional(); + + if (MergedResult.hasValue() && *MergedResult != *SpillSlot) + return Optional(); + + MergedResult = SpillSlot; + } + return MergedResult; + } + + // TODO: We can do better for PHI nodes. In cases like this: + // ptr = phi(relocated_pointer, not_relocated_pointer) + // statepoint(ptr) + // We will return that stack slot for ptr is unknown. And later we might + // assign different stack slots for ptr and relocated_pointer. This limits + // llvm's ability to remove redundant stores. + // Unfortunatelly it's hard to accomplish in current infrastructure. + // We use this function to eliminate spill store complitelly, while + // in example we still need to emit store, but instead of any location + // we need to use special "preferred" location. + + // TODO: handle simple updates. If a value is modified and the original + // value is no longer live, it would be nice to put the modified value in the + // same slot. This allows folding of the memory accesses for some + // instructions types (like an increment). + // statepoint (i) + // i1 = i+1 + // statepoint (i1) + // However we need to be carefull for cases like this: + // statepoint(i) + // i1 = i+1 + // statepoint(i, i1) + // Here we want to reserve spill slot for 'i', but not for 'i+1'. If we just + // put handling of simple modifications in this function like it's done + // for bitcasts we might end up reserving i's slot for 'i+1' because order in + // which we visit values is unspecified. + + // Don't know any information about this instruction + return Optional(); +} + /// Try to find existing copies of the incoming values in stack slots used for /// statepoint spilling. If we can find a spill slot for the incoming value, /// mark that slot as allocated, and reuse the same slot for this safepoint. /// This helps to avoid series of loads and stores that only serve to resuffle /// values on the stack between calls. -static void reservePreviousStackSlotForValue(SDValue Incoming, +static void reservePreviousStackSlotForValue(const Value *IncomingValue, SelectionDAGBuilder &Builder) { + SDValue Incoming = Builder.getValue(IncomingValue); + if (isa(Incoming) || isa(Incoming)) { // We won't need to spill this, so no need to check for previously // allocated stack slots return; } - SDValue Loc = Builder.StatepointLowering.getLocation(Incoming); - if (Loc.getNode()) { + SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming); + if (OldLocation.getNode()) // duplicates in input return; - } - // Search back for the load from a stack slot pattern to find the original - // slot we allocated for this value. We could extend this to deal with - // simple modification patterns, but simple dealing with trivial load/store - // sequences helps a lot already. - if (LoadSDNode *Load = dyn_cast(Incoming)) { - if (auto *FI = dyn_cast(Load->getBasePtr())) { - const int Index = FI->getIndex(); - auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(), - Builder.FuncInfo.StatepointStackSlots.end(), Index); - if (Itr == Builder.FuncInfo.StatepointStackSlots.end()) { - // not one of the lowering stack slots, can't reuse! - // TODO: Actually, we probably could reuse the stack slot if the value - // hasn't changed at all, but we'd need to look for intervening writes - return; - } else { - // This is one of our dedicated lowering slots - const int Offset = - std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr); - if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) { - // stack slot already assigned to someone else, can't use it! - // TODO: currently we reserve space for gc arguments after doing - // normal allocation for deopt arguments. We should reserve for - // _all_ deopt and gc arguments, then start allocating. This - // will prevent some moves being inserted when vm state changes, - // but gc state doesn't between two calls. - return; - } - // Reserve this stack slot - Builder.StatepointLowering.reserveStackSlot(Offset); - } + const int LookUpDepth = 6; + Optional Index = + findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth); + if (!Index.hasValue()) + return; - // Cache this slot so we find it when going through the normal - // assignment loop. - SDValue Loc = - Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType()); - - Builder.StatepointLowering.setLocation(Incoming, Loc); - } + auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(), + Builder.FuncInfo.StatepointStackSlots.end(), *Index); + assert(Itr != Builder.FuncInfo.StatepointStackSlots.end() && + "value spilled to the unknown stack slot"); + + // This is one of our dedicated lowering slots + const int Offset = + std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr); + if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) { + // stack slot already assigned to someone else, can't use it! + // TODO: currently we reserve space for gc arguments after doing + // normal allocation for deopt arguments. We should reserve for + // _all_ deopt and gc arguments, then start allocating. This + // will prevent some moves being inserted when vm state changes, + // but gc state doesn't between two calls. + return; } + // Reserve this stack slot + Builder.StatepointLowering.reserveStackSlot(Offset); - // TODO: handle case where a reloaded value flows through a phi to - // another safepoint. e.g. - // bb1: - // a' = relocated... - // bb2: % pred: bb1, bb3, bb4, etc. - // a_phi = phi(a', ...) - // statepoint ... a_phi - // NOTE: This will require reasoning about cross basic block values. This is - // decidedly non trivial and this might not be the right place to do it. We - // don't really have the information we need here... - - // TODO: handle simple updates. If a value is modified and the original - // value is no longer live, it would be nice to put the modified value in the - // same slot. This allows folding of the memory accesses for some - // instructions types (like an increment). - // statepoint (i) - // i1 = i+1 - // statepoint (i1) + // Cache this slot so we find it when going through the normal + // assignment loop. + SDValue Loc = Builder.DAG.getTargetFrameIndex(*Index, Incoming.getValueType()); + Builder.StatepointLowering.setLocation(Incoming, Loc); } /// Remove any duplicate (as SDValues) from the derived pointer pairs. This @@ -469,15 +522,11 @@ // doesn't change semantics at all. It is important for performance that we // reserve slots for both deopt and gc values before lowering either. for (const Value *V : StatepointSite.vm_state_args()) { - SDValue Incoming = Builder.getValue(V); - reservePreviousStackSlotForValue(Incoming, Builder); + reservePreviousStackSlotForValue(V, Builder); } for (unsigned i = 0; i < Bases.size(); ++i) { - const Value *Base = Bases[i]; - reservePreviousStackSlotForValue(Builder.getValue(Base), Builder); - - const Value *Ptr = Ptrs[i]; - reservePreviousStackSlotForValue(Builder.getValue(Ptr), Builder); + reservePreviousStackSlotForValue(Bases[i], Builder); + reservePreviousStackSlotForValue(Ptrs[i], Builder); } // First, prefix the list with the number of unique values to be Index: test/CodeGen/X86/statepoint-stack-usage.ll =================================================================== --- test/CodeGen/X86/statepoint-stack-usage.ll +++ test/CodeGen/X86/statepoint-stack-usage.ll @@ -52,9 +52,53 @@ ret i32 1 } +; Test that stack slots are reused for invokes +define i32 @back_to_back_invokes(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" { +; CHECK-LABEL: back_to_back_invokes +entry: + ; The exact stores don't matter, but there need to be three stack slots created + ; CHECK: movq %rdi, 16(%rsp) + ; CHECK: movq %rdx, 8(%rsp) + ; CHECK: movq %rsi, (%rsp) + ; CHECK: callq + %safepoint_token = invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) + to label %normal_return unwind label %exceptional_return + +normal_return: + %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 12) + %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 13) + %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 14) + ; Should work even through bitcasts + %c1.casted = bitcast i32 addrspace(1)* %c1 to i8 addrspace(1)* + ; This is the key check. There should NOT be any memory moves here + ; CHECK-NOT: movq + ; CHECK: callq + %safepoint_token2 = invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %c1.casted, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1) + to label %normal_return2 unwind label %exceptional_return2 + +normal_return2: + %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 14) + %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 13) + %c2 = tail call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %safepoint_token2, i32 12, i32 12) + ret i32 1 + +exceptional_return: + %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + ret i32 0 + +exceptional_return2: + %landing_pad2 = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + ret i32 0 +} + ; Function Attrs: nounwind declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3 +declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32, i32, i32) #3 declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) -attributes #1 = { uwtable } \ No newline at end of file +declare i32 @"personality_function"() + +attributes #1 = { uwtable }