diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h --- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -118,10 +118,6 @@ SlotMapTy::const_iterator end() const { return SlotMap.end(); } }; - /// Maps gc.statepoint instructions to their corresponding StatepointSpillMap - /// instances. - DenseMap StatepointSpillMaps; - /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in /// the entry block. This allows the allocas to be efficiently referenced /// anywhere in the function. @@ -143,12 +139,6 @@ DenseSet RegsWithFixups; - /// StatepointStackSlots - A list of temporary stack slots (frame indices) - /// used to spill values at a statepoint. We store them here to enable - /// reuse of the same stack slots across different statepoints in different - /// basic blocks. - SmallVector StatepointStackSlots; - /// MBB - The current block. MachineBasicBlock *MBB; diff --git a/llvm/include/llvm/CodeGen/LiveRegMatrix.h b/llvm/include/llvm/CodeGen/LiveRegMatrix.h --- a/llvm/include/llvm/CodeGen/LiveRegMatrix.h +++ b/llvm/include/llvm/CodeGen/LiveRegMatrix.h @@ -99,7 +99,8 @@ /// VirtReg is live across a call, and PhysReg isn't call-preserved. IK_RegMask }; - + /// Check for interference in every statepoints. + bool checkInterferenceForStatepoints(LiveInterval &VirtReg); /// Check for interference before assigning VirtReg to PhysReg. /// If this function returns IK_Free, it is legal to assign(VirtReg, PhysReg). /// When there is more than one kind of interference, the InterferenceKind diff --git a/llvm/include/llvm/CodeGen/SlotIndexes.h b/llvm/include/llvm/CodeGen/SlotIndexes.h --- a/llvm/include/llvm/CodeGen/SlotIndexes.h +++ b/llvm/include/llvm/CodeGen/SlotIndexes.h @@ -330,6 +330,8 @@ /// Idx2MBBMap - Sorted list of pairs of index of first instruction /// and MBB id. SmallVector idx2MBBMap; + /// Statepoints + SmallVector StatepointSlotIndexes; IndexListEntry* createEntry(MachineInstr *mi, unsigned index) { IndexListEntry *entry = @@ -637,6 +639,10 @@ renumberIndexes(newItr); llvm::sort(idx2MBBMap, less_first()); } + + const SmallVector &getStatepointSlotIndexes() const { + return StatepointSlotIndexes; + } }; // Specialize IntervalMapInfo for half-open slot index intervals. diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -202,6 +202,7 @@ } }; std::set CopyHints; + bool InStatepoint = false; for (MachineRegisterInfo::reg_instr_iterator I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end(); @@ -217,6 +218,8 @@ numInstr++; if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugInstr()) continue; + if (mi->getOpcode() == TargetOpcode::STATEPOINT) + InStatepoint = true; if (!visited.insert(mi).second) continue; @@ -286,7 +289,7 @@ // is not live at any reg mask. If the interval is live at a reg mask // spilling may be required. if (updateLI && li.isZeroLength(LIS.getSlotIndexes()) && - !li.isLiveAtIndexes(LIS.getRegMaskSlots())) { + !li.isLiveAtIndexes(LIS.getRegMaskSlots()) && !InStatepoint) { li.markNotSpillable(); return -1.0; } diff --git a/llvm/lib/CodeGen/InterferenceCache.cpp b/llvm/lib/CodeGen/InterferenceCache.cpp --- a/llvm/lib/CodeGen/InterferenceCache.cpp +++ b/llvm/lib/CodeGen/InterferenceCache.cpp @@ -194,12 +194,18 @@ RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum); SlotIndex Limit = BI->First.isValid() ? BI->First : Stop; for (unsigned i = 0, e = RegMaskSlots.size(); - i != e && RegMaskSlots[i] < Limit; ++i) + i != e && RegMaskSlots[i] < Limit; ++i) { if (MachineOperand::clobbersPhysReg(RegMaskBits[i], PhysReg)) { // Register mask i clobbers PhysReg before the LIU interference. BI->First = RegMaskSlots[i]; break; } + MachineInstr *MI = LIS->getInstructionFromIndex(RegMaskSlots[i]); + if (MI->getOpcode() == TargetOpcode::STATEPOINT) { + BI->First = RegMaskSlots[i]; + break; + } + } PrevPos = Stop; if (BI->First.isValid()) @@ -251,11 +257,17 @@ // Also check for register mask interference. SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start; for (unsigned i = RegMaskSlots.size(); - i && RegMaskSlots[i-1].getDeadSlot() > Limit; --i) + i && RegMaskSlots[i - 1].getDeadSlot() > Limit; --i) { if (MachineOperand::clobbersPhysReg(RegMaskBits[i-1], PhysReg)) { // Register mask i-1 clobbers PhysReg after the LIU interference. // Model the regmask clobber as a dead def. BI->Last = RegMaskSlots[i-1].getDeadSlot(); break; } + MachineInstr *MI = LIS->getInstructionFromIndex(RegMaskSlots[i - 1]); + if (MI->getOpcode() == TargetOpcode::STATEPOINT) { + BI->Last = RegMaskSlots[i - 1].getDeadSlot(); + break; + } + } } diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp --- a/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" @@ -63,6 +64,7 @@ // Make sure no stale queries get reused. invalidateVirtRegs(); + return false; } @@ -220,3 +222,20 @@ } return false; } + +bool LiveRegMatrix::checkInterferenceForStatepoints(LiveInterval &VirtReg) { + SlotIndexes *indexes = LIS->getSlotIndexes(); + for (const SlotIndex &StatepointSlot : indexes->getStatepointSlotIndexes()) { + MachineInstr *MI = indexes->getInstructionFromIndex(StatepointSlot); + assert(MI->getOpcode() == TargetOpcode::STATEPOINT); + StatepointOpers Opers(MI); + for (unsigned i = Opers.getVarIdx(); i != MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.getReg() == VirtReg.reg) + return true; + } + } + return false; +} diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -3035,25 +3035,29 @@ SmallVectorImpl &NewVRegs, SmallVirtRegSet &FixedRegisters, unsigned Depth) { + bool HasStatepointInterference = + Matrix->checkInterferenceForStatepoints(VirtReg); unsigned CostPerUseLimit = ~0u; // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); - if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) { - // If VirtReg got an assignment, the eviction info is no longre relevant. - LastEvicted.clearEvicteeInfo(VirtReg.reg); - // When NewVRegs is not empty, we may have made decisions such as evicting - // a virtual register, go with the earlier decisions and use the physical - // register. - if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) && - NewVRegs.empty()) { - unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, - CostPerUseLimit, NewVRegs); - if (CSRReg || !NewVRegs.empty()) - // Return now if we decide to use a CSR or create new vregs due to - // pre-splitting. - return CSRReg; - } else - return PhysReg; + if (!HasStatepointInterference) { + if (signed PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) { + // If VirtReg got an assignment, the eviction info is no longre relevant. + LastEvicted.clearEvicteeInfo(VirtReg.reg); + // When NewVRegs is not empty, we may have made decisions such as evicting + // a virtual register, go with the earlier decisions and use the physical + // register. + if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) && + NewVRegs.empty()) { + unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, + CostPerUseLimit, NewVRegs); + if (CSRReg || !NewVRegs.empty()) + // Return now if we decide to use a CSR or create new vregs due to + // pre-splitting. + return CSRReg; + } else + return PhysReg; + } } LiveRangeStage Stage = getStage(VirtReg); @@ -3063,7 +3067,7 @@ // Try to evict a less worthy live range, but only for ranges from the primary // queue. The RS_Split ranges already failed to do this, and they should not // get a second chance until they have been split. - if (Stage != RS_Split) + if (!HasStatepointInterference && Stage != RS_Split) if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit, FixedRegisters)) { diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -340,8 +340,6 @@ ByValArgFrameIndexMap.clear(); RegFixups.clear(); RegsWithFixups.clear(); - StatepointStackSlots.clear(); - StatepointSpillMaps.clear(); PreferredExtendType.clear(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -84,41 +84,11 @@ PendingGCRelocateCalls.erase(I); } - // TODO: Should add consistency tracking to ensure we encounter - // expected gc_result calls too. - - /// Get a stack slot we can use to store an value of type ValueType. This - /// will hopefully be a recylced slot from another statepoint. - SDValue allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder); - - void reserveStackSlot(int Offset) { - assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && - "out of bounds"); - assert(!AllocatedStackSlots.test(Offset) && "already reserved!"); - assert(NextSlotToAllocate <= (unsigned)Offset && "consistency!"); - AllocatedStackSlots.set(Offset); - } - - bool isStackSlotAllocated(int Offset) { - assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && - "out of bounds"); - return AllocatedStackSlots.test(Offset); - } - private: /// Maps pre-relocation value (gc pointer directly incoming into statepoint) /// into it's location (currently only stack slots) DenseMap Locations; - /// A boolean indicator for each slot listed in the FunctionInfo as to - /// whether it has been used in the current statepoint. Since we try to - /// preserve stack slots across safepoints, there can be gaps in which - /// slots have been allocated. - SmallBitVector AllocatedStackSlots; - - /// Points just beyond the last slot known to have been allocated - unsigned NextSlotToAllocate = 0; - /// Keep track of pending gcrelocate calls for consistency check SmallVector PendingGCRelocateCalls; }; diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -55,11 +55,7 @@ #define DEBUG_TYPE "statepoint-lowering" -STATISTIC(NumSlotsAllocatedForStatepoints, - "Number of stack slots allocated for statepoints"); STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered"); -STATISTIC(StatepointMaxSlotsRequired, - "Maximum number of stack slots required for a singe statepoint"); static void pushStackMapConstant(SmallVectorImpl& Ops, SelectionDAGBuilder &Builder, uint64_t Value) { @@ -74,200 +70,14 @@ assert(PendingGCRelocateCalls.empty() && "Trying to visit statepoint before finished processing previous one"); Locations.clear(); - NextSlotToAllocate = 0; - // Need to resize this on each safepoint - we need the two to stay in sync and - // the clear patterns of a SelectionDAGBuilder have no relation to - // FunctionLoweringInfo. Also need to ensure used bits get cleared. - AllocatedStackSlots.clear(); - AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size()); } void StatepointLoweringState::clear() { Locations.clear(); - AllocatedStackSlots.clear(); assert(PendingGCRelocateCalls.empty() && "cleared before statepoint sequence completed"); } -SDValue -StatepointLoweringState::allocateStackSlot(EVT ValueType, - SelectionDAGBuilder &Builder) { - NumSlotsAllocatedForStatepoints++; - MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); - - unsigned SpillSize = ValueType.getStoreSize(); - assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?"); - - // First look for a previously created stack slot which is not in - // use (accounting for the fact arbitrary slots may already be - // reserved), or to create a new stack slot and use it. - - const size_t NumSlots = AllocatedStackSlots.size(); - assert(NextSlotToAllocate <= NumSlots && "Broken invariant"); - - assert(AllocatedStackSlots.size() == - Builder.FuncInfo.StatepointStackSlots.size() && - "Broken invariant"); - - for (; NextSlotToAllocate < NumSlots; NextSlotToAllocate++) { - if (!AllocatedStackSlots.test(NextSlotToAllocate)) { - const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate]; - if (MFI.getObjectSize(FI) == SpillSize) { - AllocatedStackSlots.set(NextSlotToAllocate); - // TODO: Is ValueType the right thing to use here? - return Builder.DAG.getFrameIndex(FI, ValueType); - } - } - } - - // Couldn't find a free slot, so create a new one: - - SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType); - const unsigned FI = cast(SpillSlot)->getIndex(); - MFI.markAsStatepointSpillSlotObjectIndex(FI); - - Builder.FuncInfo.StatepointStackSlots.push_back(FI); - AllocatedStackSlots.resize(AllocatedStackSlots.size()+1, true); - assert(AllocatedStackSlots.size() == - Builder.FuncInfo.StatepointStackSlots.size() && - "Broken invariant"); - - StatepointMaxSlotsRequired.updateMax( - Builder.FuncInfo.StatepointStackSlots.size()); - - return SpillSlot; -} - -/// Utility function for reservePreviousStackSlotForValue. Tries to find -/// stack slot index to which we have spilled value for previous statepoints. -/// LookUpDepth specifies maximum DFS depth this function is allowed to look. -static Optional findPreviousSpillSlot(const Value *Val, - SelectionDAGBuilder &Builder, - int LookUpDepth) { - // Can not look any further - give up now - if (LookUpDepth <= 0) - return None; - - // Spill location is known for gc relocates - if (const auto *Relocate = dyn_cast(Val)) { - const auto &SpillMap = - Builder.FuncInfo.StatepointSpillMaps[Relocate->getStatepoint()]; - - auto It = SpillMap.find(Relocate->getDerivedPtr()); - if (It == SpillMap.end()) - return None; - - return It->second; - } - - // Look through bitcast instructions. - if (const BitCastInst *Cast = dyn_cast(Val)) - return findPreviousSpillSlot(Cast->getOperand(0), Builder, LookUpDepth - 1); - - // Look through phi nodes - // All incoming values should have same known stack slot, otherwise result - // is unknown. - if (const PHINode *Phi = dyn_cast(Val)) { - Optional MergedResult = None; - - for (auto &IncomingValue : Phi->incoming_values()) { - Optional SpillSlot = - findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1); - if (!SpillSlot.hasValue()) - return None; - - if (MergedResult.hasValue() && *MergedResult != *SpillSlot) - return None; - - MergedResult = SpillSlot; - } - return MergedResult; - } - - // TODO: We can do better for PHI nodes. In cases like this: - // ptr = phi(relocated_pointer, not_relocated_pointer) - // statepoint(ptr) - // We will return that stack slot for ptr is unknown. And later we might - // assign different stack slots for ptr and relocated_pointer. This limits - // llvm's ability to remove redundant stores. - // Unfortunately it's hard to accomplish in current infrastructure. - // We use this function to eliminate spill store completely, while - // in example we still need to emit store, but instead of any location - // we need to use special "preferred" location. - - // TODO: handle simple updates. If a value is modified and the original - // value is no longer live, it would be nice to put the modified value in the - // same slot. This allows folding of the memory accesses for some - // instructions types (like an increment). - // statepoint (i) - // i1 = i+1 - // statepoint (i1) - // However we need to be careful for cases like this: - // statepoint(i) - // i1 = i+1 - // statepoint(i, i1) - // Here we want to reserve spill slot for 'i', but not for 'i+1'. If we just - // put handling of simple modifications in this function like it's done - // for bitcasts we might end up reserving i's slot for 'i+1' because order in - // which we visit values is unspecified. - - // Don't know any information about this instruction - return None; -} - -/// Try to find existing copies of the incoming values in stack slots used for -/// statepoint spilling. If we can find a spill slot for the incoming value, -/// mark that slot as allocated, and reuse the same slot for this safepoint. -/// This helps to avoid series of loads and stores that only serve to reshuffle -/// values on the stack between calls. -static void reservePreviousStackSlotForValue(const Value *IncomingValue, - SelectionDAGBuilder &Builder) { - SDValue Incoming = Builder.getValue(IncomingValue); - - if (isa(Incoming) || isa(Incoming)) { - // We won't need to spill this, so no need to check for previously - // allocated stack slots - return; - } - - SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming); - if (OldLocation.getNode()) - // Duplicates in input - return; - - const int LookUpDepth = 6; - Optional Index = - findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth); - if (!Index.hasValue()) - return; - - const auto &StatepointSlots = Builder.FuncInfo.StatepointStackSlots; - - auto SlotIt = find(StatepointSlots, *Index); - assert(SlotIt != StatepointSlots.end() && - "Value spilled to the unknown stack slot"); - - // This is one of our dedicated lowering slots - const int Offset = std::distance(StatepointSlots.begin(), SlotIt); - if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) { - // stack slot already assigned to someone else, can't use it! - // TODO: currently we reserve space for gc arguments after doing - // normal allocation for deopt arguments. We should reserve for - // _all_ deopt and gc arguments, then start allocating. This - // will prevent some moves being inserted when vm state changes, - // but gc state doesn't between two calls. - return; - } - // Reserve this stack slot - Builder.StatepointLowering.reserveStackSlot(Offset); - - // Cache this slot so we find it when going through the normal - // assignment loop. - SDValue Loc = - Builder.DAG.getTargetFrameIndex(*Index, Builder.getFrameIndexTy()); - Builder.StatepointLowering.setLocation(Incoming, Loc); -} - /// Remove any duplicate (as SDValues) from the derived pointer pairs. This /// is not required for correctness. It's purpose is to reduce the size of /// StackMap section. It has no effect on the number of spill slots required @@ -276,8 +86,7 @@ removeDuplicateGCPtrs(SmallVectorImpl &Bases, SmallVectorImpl &Ptrs, SmallVectorImpl &Relocs, - SelectionDAGBuilder &Builder, - FunctionLoweringInfo::StatepointSpillMap &SSM) { + SelectionDAGBuilder &Builder) { DenseMap Seen; SmallVector NewBases, NewPtrs; @@ -292,9 +101,6 @@ NewPtrs.push_back(Ptrs[i]); NewRelocs.push_back(Relocs[i]); Seen[SD] = Ptrs[i]; - } else { - // Duplicate pointer found, note in SSM and move on: - SSM.DuplicateMap[Ptrs[i]] = SeenIt->second; } } assert(Bases.size() >= NewBases.size()); @@ -358,63 +164,13 @@ MFI.getObjectAlignment(FI.getIndex())); } -/// Spill a value incoming to the statepoint. It might be either part of -/// vmstate -/// or gcstate. In both cases unconditionally spill it on the stack unless it -/// is a null constant. Return pair with first element being frame index -/// containing saved value and second element with outgoing chain from the -/// emitted store -static std::tuple -spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, - SelectionDAGBuilder &Builder) { - SDValue Loc = Builder.StatepointLowering.getLocation(Incoming); - MachineMemOperand* MMO = nullptr; - - // Emit new store if we didn't do it for this ptr before - if (!Loc.getNode()) { - Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(), - Builder); - int Index = cast(Loc)->getIndex(); - // We use TargetFrameIndex so that isel will not select it into LEA - Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy()); - - // Right now we always allocate spill slots that are of the same - // size as the value we're about to spill (the size of spillee can - // vary since we spill vectors of pointers too). At some point we - // can consider allowing spills of smaller values to larger slots - // (i.e. change the '==' in the assert below to a '>='). - MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); - assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() && - "Bad spill: stack slot does not match!"); - - // Note: Using the alignment of the spill slot (rather than the abi or - // preferred alignment) is required for correctness when dealing with spill - // slots with preferred alignments larger than frame alignment.. - auto &MF = Builder.DAG.getMachineFunction(); - auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); - auto *StoreMMO = - MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, - MFI.getObjectSize(Index), - MFI.getObjectAlignment(Index)); - Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, - StoreMMO); - - MMO = getMachineMemOperand(MF, *cast(Loc)); - - Builder.StatepointLowering.setLocation(Incoming, Loc); - } - - assert(Loc.getNode()); - return std::make_tuple(Loc, Chain, MMO); -} - /// Lower a single value incoming to a statepoint node. This value can be /// either a deopt value or a gc value, the handling is the same. We special /// case constants and allocas, then fall back to spilling if required. -static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly, - SmallVectorImpl &Ops, - SmallVectorImpl &MemRefs, - SelectionDAGBuilder &Builder) { +static void +lowerIncomingStatepointValue(SDValue Incoming, SmallVectorImpl &Ops, + SmallVectorImpl &MemRefs, + SelectionDAGBuilder &Builder) { // Note: We know all of these spills are independent, but don't bother to // exploit that chain wise. DAGCombine will happily do so as needed, so // doing it here would be a small compile time win at most. @@ -441,26 +197,8 @@ auto *MMO = getMachineMemOperand(MF, *FI); MemRefs.push_back(MMO); - } else if (LiveInOnly) { - // If this value is live in (not live-on-return, or live-through), we can - // treat it the same way patchpoint treats it's "live in" values. We'll - // end up folding some of these into stack references, but they'll be - // handled by the register allocator. Note that we do not have the notion - // of a late use so these values might be placed in registers which are - // clobbered by the call. This is fine for live-in. - Ops.push_back(Incoming); } else { - // Otherwise, locate a spill slot and explicitly spill it so it - // can be found by the runtime later. We currently do not support - // tracking values through callee saved registers to their eventual - // spill location. This would be a useful optimization, but would - // need to be optional since it requires a lot of complexity on the - // runtime side which not all would support. - auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder); - Ops.push_back(std::get<0>(Res)); - if (auto *MMO = std::get<2>(Res)) - MemRefs.push_back(MMO); - Chain = std::get<1>(Res);; + Ops.push_back(Incoming); } Builder.DAG.setRoot(Chain); @@ -475,7 +213,8 @@ /// will be set to the last value spilled (if any were). static void lowerStatepointMetaArgs(SmallVectorImpl &Ops, - SmallVectorImpl &MemRefs, SelectionDAGBuilder::StatepointLoweringInfo &SI, + SmallVectorImpl &MemRefs, + SelectionDAGBuilder::StatepointLoweringInfo &SI, SelectionDAGBuilder &Builder) { // Lower the deopt and gc arguments for this statepoint. Layout will be: // deopt argument length, deopt arguments.., gc arguments... @@ -521,24 +260,6 @@ const bool LiveInDeopt = SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn; - auto isGCValue =[&](const Value *V) { - return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V); - }; - - // Before we actually start lowering (and allocating spill slots for values), - // reserve any stack slots which we judge to be profitable to reuse for a - // particular value. This is purely an optimization over the code below and - // doesn't change semantics at all. It is important for performance that we - // reserve slots for both deopt and gc values before lowering either. - for (const Value *V : SI.DeoptState) { - if (!LiveInDeopt || isGCValue(V)) - reservePreviousStackSlotForValue(V, Builder); - } - for (unsigned i = 0; i < SI.Bases.size(); ++i) { - reservePreviousStackSlotForValue(SI.Bases[i], Builder); - reservePreviousStackSlotForValue(SI.Ptrs[i], Builder); - } - // First, prefix the list with the number of unique values to be // lowered. Note that this is the number of *Values* not the // number of SDValues required to lower them. @@ -558,8 +279,9 @@ } if (!Incoming.getNode()) Incoming = Builder.getValue(V); - const bool LiveInValue = LiveInDeopt && !isGCValue(V); - lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, MemRefs, Builder); + const bool LiveInValue = LiveInDeopt; + (void)LiveInValue; + lowerIncomingStatepointValue(Incoming, Ops, MemRefs, Builder); } // Finally, go ahead and lower all the gc arguments. There's no prefixed @@ -569,12 +291,10 @@ // (base[0], ptr[0], base[1], ptr[1], ...) for (unsigned i = 0; i < SI.Bases.size(); ++i) { const Value *Base = SI.Bases[i]; - lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false, - Ops, MemRefs, Builder); + lowerIncomingStatepointValue(Builder.getValue(Base), Ops, MemRefs, Builder); const Value *Ptr = SI.Ptrs[i]; - lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false, - Ops, MemRefs, Builder); + lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, MemRefs, Builder); } // If there are any explicit spill slots passed to the statepoint, record @@ -601,33 +321,17 @@ // This can not be embedded in lowering loops as we need to record *all* // values, while previous loops account only values with unique SDValues. const Instruction *StatepointInstr = SI.StatepointInstr; - auto &SpillMap = Builder.FuncInfo.StatepointSpillMaps[StatepointInstr]; for (const GCRelocateInst *Relocate : SI.GCRelocates) { const Value *V = Relocate->getDerivedPtr(); - SDValue SDV = Builder.getValue(V); - SDValue Loc = Builder.StatepointLowering.getLocation(SDV); - - if (Loc.getNode()) { - SpillMap.SlotMap[V] = cast(Loc)->getIndex(); - } else { - // Record value as visited, but not spilled. This is case for allocas - // and constants. For this values we can avoid emitting spill load while - // visiting corresponding gc_relocate. - // Actually we do not need to record them in this map at all. - // We do this only to check that we are not relocating any unvisited - // value. - SpillMap.SlotMap[V] = None; - - // Default llvm mechanisms for exporting values which are used in - // different basic blocks does not work for gc relocates. - // Note that it would be incorrect to teach llvm that all relocates are - // uses of the corresponding values so that it would automatically - // export them. Relocates of the spilled values does not use original - // value. - if (Relocate->getParent() != StatepointInstr->getParent()) - Builder.ExportFromCurrentBlock(V); - } + // Default llvm mechanisms for exporting values which are used in + // different basic blocks does not work for gc relocates. + // Note that it would be incorrect to teach llvm that all relocates are + // uses of the corresponding values so that it would automatically + // export them. Relocates of the spilled values does not use original + // value. + if (Relocate->getParent() != StatepointInstr->getParent()) + Builder.ExportFromCurrentBlock(V); } } @@ -653,8 +357,7 @@ // input. Also has the effect of removing duplicates in the original // llvm::Value input list as well. This is a useful optimization for // reducing the size of the StackMap section. It has no other impact. - removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this, - FuncInfo.StatepointSpillMaps[SI.StatepointInstr]); + removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this); assert(SI.Bases.size() == SI.Ptrs.size() && SI.Ptrs.size() == SI.GCRelocates.size()); @@ -1003,45 +706,7 @@ const Value *DerivedPtr = Relocate.getDerivedPtr(); SDValue SD = getValue(DerivedPtr); - - auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()]; - auto SlotIt = SpillMap.find(DerivedPtr); - assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value"); - Optional DerivedPtrLocation = SlotIt->second; - - // We didn't need to spill these special cases (constants and allocas). - // See the handling in spillIncomingValueForStatepoint for detail. - if (!DerivedPtrLocation) { - setValue(&Relocate, SD); - return; - } - - unsigned Index = *DerivedPtrLocation; - SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy()); - - // Note: We know all of these reloads are independent, but don't bother to - // exploit that chain wise. DAGCombine will happily do so as needed, so - // doing it here would be a small compile time win at most. - SDValue Chain = getRoot(); - - auto &MF = DAG.getMachineFunction(); - auto &MFI = MF.getFrameInfo(); - auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); - auto *LoadMMO = - MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, - MFI.getObjectSize(Index), - MFI.getObjectAlignment(Index)); - - auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), - Relocate.getType()); - - SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain, - SpillSlot, LoadMMO); - - DAG.setRoot(SpillLoad.getValue(1)); - - assert(SpillLoad.getNode()); - setValue(&Relocate, SpillLoad); + setValue(&Relocate, SD); } void SelectionDAGBuilder::LowerDeoptimizeCall(const CallInst *CI) { diff --git a/llvm/lib/CodeGen/SlotIndexes.cpp b/llvm/lib/CodeGen/SlotIndexes.cpp --- a/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/llvm/lib/CodeGen/SlotIndexes.cpp @@ -81,6 +81,10 @@ // Save this base index in the maps. mi2iMap.insert(std::make_pair( &MI, SlotIndex(&indexList.back(), SlotIndex::Slot_Block))); + if (MI.getOpcode() == TargetOpcode::STATEPOINT) { + StatepointSlotIndexes.push_back( + SlotIndex(&indexList.back(), SlotIndex::Slot_Block)); + } } // We insert one blank instructions between basic blocks. diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -775,7 +775,8 @@ // value. The inserted COPY is not a kill, and we don't need to recompute // the source live range. The spiller also won't try to hoist this copy. if (SpillMode && !SlotIndex::isSameInstr(ParentVNI->def, Idx) && - MI->readsVirtualRegister(Edit->getReg())) { + MI->readsVirtualRegister(Edit->getReg()) && + (MI->getOpcode() != TargetOpcode::STATEPOINT)) { forceRecompute(0, *ParentVNI); defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI); return Idx; diff --git a/llvm/test/CodeGen/X86/statepoint-no-early-ldr.ll b/llvm/test/CodeGen/X86/statepoint-no-early-ldr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/statepoint-no-early-ldr.ll @@ -0,0 +1,66 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +define i8 addrspace(1)* @no_early_ldr(i8 addrspace(1)* %obj0, i8 addrspace(1)* %obj1, i8 addrspace(1)* %obj2, i8 addrspace(1)* %obj3, i8 addrspace(1)* %obj4, i8 addrspace(1)* %obj5, i8 addrspace(1)* %obj6, i8 addrspace(1)* %obj7, i8 addrspace(1)* %obj8, i8 addrspace(1)* %obj9, i8 addrspace(1)* %obj10, i8 addrspace(1)* %obj11, i8 addrspace(1)* %obj12, i8 %b0, i8 %b1) gc "statepoint-example" { +; CHECK-LABEL: no_early_ldr: +; CHECK: .cfi_startproc +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: subq $56, %rsp + +prologue: + br label %entry +entry: + %safepoint_token0 = call token (i64, i32, void (i8 addrspace(1)*,i8 addrspace(1)*,i8 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 4, void (i8 addrspace(1)*,i8 addrspace(1)*,i8 addrspace(1)*)* null, i32 3, i32 0, i8 addrspace(1)* %obj0, i8 addrspace(1)* %obj1, i8 addrspace(1)* %obj2, i32 0, i32 0, i8 addrspace(1)* %obj0, i8 addrspace(1)* %obj1, i8 addrspace(1)* %obj2, i8 addrspace(1)* %obj3, i8 addrspace(1)* %obj4, i8 addrspace(1)* %obj5, i8 addrspace(1)* %obj6, i8 addrspace(1)* %obj7, i8 addrspace(1)* %obj8, i8 addrspace(1)* %obj9, i8 addrspace(1)* %obj10, i8 addrspace(1)* %obj11, i8 addrspace(1)* %obj12) + %cmp0 = icmp ne i8 %b0, 0 + br i1 %cmp0, label %return_null, label %cont.1 +cont.1: + %obj0.relocated0 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token0, i32 10, i32 10) + %obj1.relocated0 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token0, i32 11, i32 11) + %obj2.relocated0 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token0, i32 12, i32 12) + %obj3.relocated0 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token0, i32 13, i32 13) + %obj4.relocated0 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token0, i32 14, i32 14) + %obj5.relocated0 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token0, i32 15, i32 15) + %obj6.relocated0 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token0, i32 16, i32 16) + %obj7.relocated0 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token0, i32 17, i32 17) + %obj8.relocated0 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token0, i32 18, i32 18) + %obj9.relocated0 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token0, i32 19, i32 19) + %obj10.relocated0 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token0, i32 20, i32 20) + %obj11.relocated0 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token0, i32 21, i32 21) + %obj12.relocated0 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token0, i32 22, i32 22) + + %cmp1 = icmp ne i8 %b1, 0 + br i1 %cmp1, label %return_null, label %cont.2 +cont.2: + %ret_0 = call i8 (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*) @call_extra_0(i8 addrspace(1)* %obj1.relocated0, i8 addrspace(1)* %obj3.relocated0, i8 addrspace(1)* %obj5.relocated0) + %cmp2 = icmp ne i8 %ret_0, 0 + br i1 %cmp2, label %return_null, label %cont.3 +cont.3: + %ret_1 = call i8 (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*) @call_extra_0(i8 addrspace(1)* %obj7.relocated0, i8 addrspace(1)* %obj9.relocated0, i8 addrspace(1)* %obj0.relocated0) + %cmp3 = icmp ne i8 %ret_1, 0 + br i1 %cmp3, label %return_null, label %cont.4 +cont.4: + %ret_2 = call i8 (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*) @call_extra_0(i8 addrspace(1)* %obj2.relocated0, i8 addrspace(1)* %obj4.relocated0, i8 addrspace(1)* %obj6.relocated0) + %cmp4 = icmp ne i8 %ret_2, 0 + br i1 %cmp4, label %return_null, label %cont.5 +cont.5: + %ret_3 = call i8 (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*) @call_extra_0(i8 addrspace(1)* %obj10.relocated0, i8 addrspace(1)* %obj11.relocated0, i8 addrspace(1)* %obj12.relocated0) + ret i8 addrspace(1)* %obj0.relocated0 + +return_null: + ret i8 addrspace(1)* null +} + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void (i8 addrspace(1)*,i8 addrspace(1)*,i8 addrspace(1)*)*, i32, i32, ...) +declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) +declare i8 @call_extra_0(i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*)