diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -82,6 +82,28 @@ return N; } +/// Return starting index of GC operand list. +// FIXME: need a better place for this. Put it in StackMaps? +static unsigned getStatepointGCArgStartIdx(MachineInstr *MI) { + assert(MI->getOpcode() == TargetOpcode::STATEPOINT && + "STATEPOINT node expected"); + unsigned OperIdx = StatepointOpers(MI).getNumDeoptArgsIdx(); + unsigned NumDeopts = MI->getOperand(OperIdx).getImm(); + // At this point stack references has not been lowered yet, so they + // take single operand. + ++OperIdx; + while (NumDeopts--) { + MachineOperand &MO = MI->getOperand(OperIdx); + if (MO.isImm() && MO.getImm() == StackMaps::ConstantOp) { + ++OperIdx; + assert(MI->getOperand(OperIdx).isImm() && + "Unexpected statepoint operand"); + } + ++OperIdx; + } + return OperIdx; +} + /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void InstrEmitter:: @@ -200,6 +222,8 @@ bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() && II.isVariadic() && II.variadicOpsAreDefs(); unsigned NumVRegs = HasVRegVariadicDefs ? NumResults : II.getNumDefs(); + if (Node->getMachineOpcode() == TargetOpcode::STATEPOINT) + NumVRegs = NumResults; for (unsigned i = 0; i < NumVRegs; ++i) { // If the specific node value is only used by a CopyToReg and the dest reg // is a vreg in the same register class, use the CopyToReg'd destination @@ -821,6 +845,8 @@ NumDefs = NumResults; } ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); + } else if (Opc == TargetOpcode::STATEPOINT) { + NumDefs = NumResults; } unsigned NumImpUses = 0; @@ -970,6 +996,20 @@ if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef()) MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); + // STATEPOINT is too 'dynamic' to have meaningful machine description. + // We have to manually tie operands. + if (Opc == TargetOpcode::STATEPOINT && NumDefs > 0) { + assert(!HasPhysRegOuts && "STATEPOINT mishandled"); + MachineInstr *MI = MIB; + unsigned Def = 0; + unsigned Use = getStatepointGCArgStartIdx(MI) + 1; + while (Def < NumDefs) { + if (MI->getOperand(Use).isReg()) + MI->tieOperands(Def++, Use); + Use += 2; + } + } + // Run post-isel target hook to adjust this instruction if needed. if (II.hasPostISelHook()) TLI->AdjustInstrPostInstrSelection(*MIB, Node); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -125,8 +125,7 @@ PhysReg = Reg; } else if (Def->isMachineOpcode()) { const MCInstrDesc &II = TII->get(Def->getMachineOpcode()); - if (ResNo >= II.getNumDefs() && - II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) + if (ResNo >= II.getNumDefs() && II.hasImplicitDefOfPhysReg(Reg)) PhysReg = Reg; } diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -103,6 +103,10 @@ return AllocatedStackSlots.test(Offset); } + /// For each statepoint keep mapping from original derived pointer to + /// the statepoint node result defining its new value. + DenseMap DerivedPtrMap; + private: /// Maps pre-relocation value (gc pointer directly incoming into statepoint) /// into it's location (currently only stack slots) diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -67,6 +67,10 @@ "use-registers-for-deopt-values", cl::Hidden, cl::init(false), cl::desc("Allow using registers for non pointer deopt args")); +cl::opt UseRegistersForGCPointers( + "use-registers-for-gc-values", cl::Hidden, cl::init(false), + cl::desc("Allow using registers for GC pointer meta args")); + static void pushStackMapConstant(SmallVectorImpl& Ops, SelectionDAGBuilder &Builder, uint64_t Value) { SDLoc L = Builder.getCurSDLoc(); @@ -221,7 +225,6 @@ return None; } - /// Return true if-and-only-if the given SDValue can be lowered as either a /// constant argument or a stack reference. The key point is that the value /// doesn't need to be spilled or tracked as a vreg use. @@ -242,7 +245,6 @@ Incoming.isUndef()); } - /// Try to find existing copies of the incoming values in stack slots used for /// statepoint spilling. If we can find a spill slot for the incoming value, /// mark that slot as allocated, and reuse the same slot for this safepoint. @@ -388,7 +390,7 @@ StoreMMO); MMO = getMachineMemOperand(MF, *cast(Loc)); - + Builder.StatepointLowering.setLocation(Incoming, Loc); } @@ -485,7 +487,9 @@ /// will be set to the last value spilled (if any were). static void lowerStatepointMetaArgs(SmallVectorImpl &Ops, - SmallVectorImpl &MemRefs, SelectionDAGBuilder::StatepointLoweringInfo &SI, + SmallVectorImpl &MemRefs, + DenseMap &Ptr2ResNo, + SelectionDAGBuilder::StatepointLoweringInfo &SI, SelectionDAGBuilder &Builder) { // Lower the deopt and gc arguments for this statepoint. Layout will be: // deopt argument length, deopt arguments.., gc arguments... @@ -531,6 +535,38 @@ const bool LiveInDeopt = SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn; + // Decide which deriver pointers will go on VRegs + const size_t MaxTiedRegs = 15; // Max number of tied regs MI can have. + unsigned MaxVRegPtrs = + UseRegistersForGCPointers ? std::min(MaxTiedRegs, SI.Ptrs.size()) : 0; + // Use old spill scheme for cross-block relocates. + if (SI.StatepointInstr) { + const BasicBlock *BB = SI.StatepointInstr->getParent(); + bool NonLocalReloc = + llvm::any_of(SI.GCRelocates, [BB](const GCRelocateInst *R) { + return R->getParent() != BB; + }); + if (NonLocalReloc) + MaxVRegPtrs = 0; + } + + LLVM_DEBUG(dbgs() << "Desiding how to lower GC Pointers:\n"); + DenseSet LowerAsVReg; + for (const Value *P : SI.Ptrs) { + if (LowerAsVReg.size() == MaxVRegPtrs) + break; + SDValue PtrSD = Builder.getValue(P); + if (willLowerDirectly(PtrSD) || P->getType()->isVectorTy() || + Builder.DL->isNonIntegralPointerType(P->getType())) { + LLVM_DEBUG(dbgs() << "spill "; PtrSD.dump(&Builder.DAG)); + continue; + } + LLVM_DEBUG(dbgs() << "vreg "; PtrSD.dump(&Builder.DAG)); + LowerAsVReg.insert(PtrSD); + } + LLVM_DEBUG(dbgs() << LowerAsVReg.size() + << " derived pointers will go in vregs\n"); + auto isGCValue = [&](const Value *V) { auto *Ty = V->getType(); if (!Ty->isPtrOrPtrVectorTy()) @@ -542,7 +578,9 @@ }; auto requireSpillSlot = [&](const Value *V) { - return !(LiveInDeopt || UseRegistersForDeoptValues) || isGCValue(V); + if (isGCValue(V)) + return LowerAsVReg.count(Builder.getValue(V)) == 0; + return !(LiveInDeopt || UseRegistersForDeoptValues); }; // Before we actually start lowering (and allocating spill slots for values), @@ -554,10 +592,19 @@ if (requireSpillSlot(V)) reservePreviousStackSlotForValue(V, Builder); } + + unsigned CurNumVRegs = 0; for (unsigned i = 0; i < SI.Bases.size(); ++i) { + SDValue SDV = Builder.getValue(SI.Ptrs[i]); + if (LowerAsVReg.count(SDV)) { + Ptr2ResNo[SDV] = (int)CurNumVRegs++; + continue; + } + Ptr2ResNo[SDV] = -1; reservePreviousStackSlotForValue(SI.Bases[i], Builder); reservePreviousStackSlotForValue(SI.Ptrs[i], Builder); } + assert(CurNumVRegs == LowerAsVReg.size() && "Lowered pointers mismatch"); // First, prefix the list with the number of unique values to be // lowered. Note that this is the number of *Values* not the @@ -567,6 +614,7 @@ // The vm state arguments are lowered in an opaque manner. We do not know // what type of values are contained within. + LLVM_DEBUG(dbgs() << "Lowering deopt state\n"); for (const Value *V : SI.DeoptState) { SDValue Incoming; // If this is a function argument at a static frame index, generate it as @@ -578,6 +626,8 @@ } if (!Incoming.getNode()) Incoming = Builder.getValue(V); + LLVM_DEBUG(dbgs() << "Value " << *V + << " requireSpillSlot = " << requireSpillSlot(V) << "\n"); lowerIncomingStatepointValue(Incoming, requireSpillSlot(V), Ops, MemRefs, Builder); } @@ -588,14 +638,16 @@ // it's (lowered) derived pointer. i.e // (base[0], ptr[0], base[1], ptr[1], ...) for (unsigned i = 0; i < SI.Bases.size(); ++i) { - const Value *Base = SI.Bases[i]; - lowerIncomingStatepointValue(Builder.getValue(Base), - /*RequireSpillSlot*/ true, Ops, MemRefs, - Builder); + SDValue Derived = Builder.getValue(SI.Ptrs[i]); + assert(Ptr2ResNo.find(Derived) != Ptr2ResNo.end() && + "Broken DerivedPtr map"); - const Value *Ptr = SI.Ptrs[i]; - lowerIncomingStatepointValue(Builder.getValue(Ptr), - /*RequireSpillSlot*/ true, Ops, MemRefs, + bool RequireSpillSlot = (LowerAsVReg.count(Derived) == 0); + assert((RequireSpillSlot || Ptr2ResNo[Derived] >= 0) && "mismatch"); + const Value *Base = SI.Bases[i]; + lowerIncomingStatepointValue(Builder.getValue(Base), RequireSpillSlot, Ops, + MemRefs, Builder); + lowerIncomingStatepointValue(Derived, RequireSpillSlot, Ops, MemRefs, Builder); } @@ -630,7 +682,9 @@ SDValue SDV = Builder.getValue(V); SDValue Loc = Builder.StatepointLowering.getLocation(SDV); - if (Loc.getNode()) { + if (Ptr2ResNo[SDV] >= 0) { + SpillMap[V] = None; + } else if (Loc.getNode()) { SpillMap[V] = cast(Loc)->getIndex(); } else { // Record value as visited, but not spilled. This is case for allocas @@ -665,6 +719,7 @@ assert(SI.Bases.size() == SI.Ptrs.size() && SI.Ptrs.size() <= SI.GCRelocates.size()); + LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n"); #ifndef NDEBUG for (auto *Reloc : SI.GCRelocates) if (Reloc->getParent() == SI.StatepointInstr->getParent()) @@ -674,7 +729,12 @@ // Lower statepoint vmstate and gcstate arguments SmallVector LoweredMetaArgs; SmallVector MemRefs; - lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, SI, *this); + DenseMap Ptr2ResNo; + lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, Ptr2ResNo, SI, *this); + + unsigned NumVRegs = + llvm::count_if(Ptr2ResNo, [](auto V) { return V.second >= 0; }); + LLVM_DEBUG(dbgs() << "NumVRegs = " << NumVRegs << "\n"); // Now that we've emitted the spills, we need to update the root so that the // call sequence is ordered correctly. @@ -788,14 +848,35 @@ // Compute return values. Provide a glue output since we consume one as // input. This allows someone else to chain off us as needed. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SmallVector NodeTys; + for (auto &Ptr : SI.Ptrs) { + SDValue SD = getValue(Ptr); + if (Ptr2ResNo[SD] >= 0) { + NodeTys.push_back(SD.getValueType()); + } + } + LLVM_DEBUG(dbgs() << "Statepoint has " << NodeTys.size() << " results\n"); + assert(NodeTys.size() == NumVRegs && "Inconsistent GC Ptr lowering"); + NodeTys.push_back(MVT::Other); + NodeTys.push_back(MVT::Glue); + unsigned NumResults = NodeTys.size(); MachineSDNode *StatepointMCNode = DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops); DAG.setNodeMemRefs(StatepointMCNode, MemRefs); SDNode *SinkNode = StatepointMCNode; + // Fill mapping from derived pointer to statepoint result denoting its + // relocated value. + auto &DPtrMap = StatepointLowering.DerivedPtrMap; + for (const auto *Relocate : SI.GCRelocates) { + Value *Derived = Relocate->getDerivedPtr(); + SDValue SD = getValue(Derived); + if (Ptr2ResNo[SD] >= 0) + DPtrMap[Derived] = SDValue(StatepointMCNode, Ptr2ResNo[SD]); + } + // Build the GC_TRANSITION_END node if necessary. // // See the comment above regarding GC_TRANSITION_START for the layout of @@ -804,7 +885,7 @@ SmallVector TEOps; // Add chain - TEOps.push_back(SDValue(StatepointMCNode, 0)); + TEOps.push_back(SDValue(StatepointMCNode, NumResults - 2)); // Add GC transition arguments for (const Value *V : SI.GCTransitionArgs) { @@ -814,7 +895,7 @@ } // Add glue - TEOps.push_back(SDValue(StatepointMCNode, 1)); + TEOps.push_back(SDValue(StatepointMCNode, NumResults - 1)); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -825,7 +906,12 @@ } // Replace original call - DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root + // Call: ch,glue = CALL ... + // Statepoint: [gc relocates],ch,glue = STATEPOINT ... + unsigned NumSinkValues = SinkNode->getNumValues(); + SDValue StatepointValues[2] = {SDValue(SinkNode, NumSinkValues - 2), + SDValue(SinkNode, NumSinkValues - 1)}; + DAG.ReplaceAllUsesWith(CallNode, StatepointValues); // Remove original call node DAG.DeleteNode(CallNode); @@ -927,7 +1013,7 @@ setValue(&I, ReturnValue); return; } - + // Result value will be used in a different basic block so we need to export // it now. Default exporting mechanism will not work here because statepoint // call has a different type than the actual call. It means that by default @@ -1033,6 +1119,13 @@ return; } + auto &DPtrMap = StatepointLowering.DerivedPtrMap; + auto It = DPtrMap.find(DerivedPtr); + if (It != DPtrMap.end()) { + setValue(&Relocate, It->second); + return; + } + auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()]; auto SlotIt = SpillMap.find(DerivedPtr); assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value"); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1041,9 +1041,15 @@ // Inherit previous memory operands. MIB.cloneMemRefs(*MI); - for (auto &MO : MI->operands()) { + for (unsigned i = 0; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); if (!MO.isFI()) { + unsigned TiedTo = i; + if (MO.isReg() && MO.isTied()) + TiedTo = MI->findTiedOperandIdx(i); MIB.add(MO); + if (TiedTo < i) + MIB->tieOperands(TiedTo, MIB->getNumOperands() - 1); continue; }