diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h --- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -99,6 +99,15 @@ using StatepointSpillMapTy = DenseMap>; DenseMap StatepointSpillMaps; + /// For each statepoint keep mapping from original derived pointer to + /// the index of StatepointSDNode result defining its new value. + using DerivedPtrMapTy = DenseMap; + DenseMap DerivedPtrMap; + + /// For each statepoint keep virtual registers its result values has + /// been exported to. + DenseMap> StatepointRegs; + /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in /// the entry block. This allows the allocas to be efficiently referenced /// anywhere in the function. diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -81,6 +81,37 @@ return N; } +/// Collect used physical registers up the glue chain. +static void collectPhysRegsFromGlueChain(const SDNode *Node, + const TargetInstrInfo *TII, + SmallVectorImpl &UsedRegs) { + if (Node->getValueType(Node->getNumValues() - 1) != MVT::Glue) + return; + for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) { + if (F->getOpcode() == ISD::CopyFromReg) { + UsedRegs.push_back(cast(F->getOperand(1))->getReg()); + continue; + } else if (F->getOpcode() == ISD::CopyToReg) { + // Skip CopyToReg nodes that are internal to the glue chain. + continue; + } + // Collect declared implicit uses. + if (F->isMachineOpcode()) { + const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); + UsedRegs.append(MCID.getImplicitUses(), + MCID.getImplicitUses() + MCID.getNumImplicitUses()); + } + // In addition to declared implicit uses, we must also check for + // direct RegisterSDNode operands. + for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) + if (RegisterSDNode *R = dyn_cast(F->getOperand(i))) { + Register Reg = R->getReg(); + if (Reg.isPhysical()) + UsedRegs.push_back(Reg); + } + } +} + /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void InstrEmitter:: @@ -940,30 +971,7 @@ } } - // Scan the glue chain for any used physregs. - if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { - for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) { - if (F->getOpcode() == ISD::CopyFromReg) { - UsedRegs.push_back(cast(F->getOperand(1))->getReg()); - continue; - } else if (F->getOpcode() == ISD::CopyToReg) { - // Skip CopyToReg nodes that are internal to the glue chain. - continue; - } - // Collect declared implicit uses. - const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); - UsedRegs.append(MCID.getImplicitUses(), - MCID.getImplicitUses() + MCID.getNumImplicitUses()); - // In addition to declared implicit uses, we must also check for - // direct RegisterSDNode operands. - for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) - if (RegisterSDNode *R = dyn_cast(F->getOperand(i))) { - Register Reg = R->getReg(); - if (Reg.isPhysical()) - UsedRegs.push_back(Reg); - } - } - } + collectPhysRegsFromGlueChain(Node, TII, UsedRegs); // Finally mark unused registers as dead. if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef()) @@ -1041,6 +1049,44 @@ break; } + case ISD::GC_STATEPOINT: { + StatepointSDNode *SN = cast(Node); + unsigned GCArgStart = SN->getGCArgStart(); + unsigned NumValues = CountResults(Node); + MachineInstrBuilder MIB = + BuildMI(*MF, Node->getDebugLoc(), TII->get(TargetOpcode::STATEPOINT)); + + for (unsigned i = 0; i < NumValues; ++i) { + SDValue Def(SN, i); + SDValue Use = SN->getOperand(GCArgStart + 1 + i * 2); + unsigned UseReg = getVR(Use, VRBaseMap); + unsigned DefReg = MRI->cloneVirtualRegister(UseReg); + MIB = MIB.addDef(DefReg); + VRBaseMap[Def] = DefReg; + } + + unsigned NumOperands = Node->getNumOperands(); + for (unsigned i = 0; i < NumOperands; ++i) { + const SDValue &O = Node->getOperand(i); + if (O.getValueType() == MVT::Other || O.getValueType() == MVT::Glue) + continue; + AddOperand(MIB, O, 0, nullptr, VRBaseMap, false, false, false); + } + + unsigned Use = NumValues + GCArgStart + 1; + for (unsigned Def = 0; Def < NumValues; ++Def, Use += 2) { + MIB->tieOperands(Def, Use); + } + + MBB->insert(InsertPos, MIB); + + SmallVector UsedRegs; + collectPhysRegsFromGlueChain(Node, TII, UsedRegs); + if (!UsedRegs.empty()) + MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); + + break; + } case ISD::INLINEASM: case ISD::INLINEASM_BR: { unsigned NumOps = Node->getNumOperands(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2813,6 +2813,7 @@ case ISD::ANNOTATION_LABEL: case ISD::LIFETIME_START: case ISD::LIFETIME_END: + case ISD::GC_STATEPOINT: NodeToMatch->setNodeId(-1); // Mark selected. return; case ISD::AssertSext: diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -66,6 +66,10 @@ "use-registers-for-deopt-values", cl::Hidden, cl::init(false), cl::desc("Allow using registers for non pointer deopt args")); +cl::opt UseRegistersForGCPointers( + "use-registers-for-gcptrs", cl::Hidden, cl::init(false), + cl::desc("Allow using registers for GC pointer meta args")); + static void pushStackMapConstant(SmallVectorImpl& Ops, SelectionDAGBuilder &Builder, uint64_t Value) { SDLoc L = Builder.getCurSDLoc(); @@ -220,6 +224,11 @@ return None; } +// Return true if V is a GC pointer which need not to be relocated. +static bool isNonRelocatablePtr(SDValue V) { + return (isa(V) || isa(V)); +} + /// Try to find existing copies of the incoming values in stack slots used for /// statepoint spilling. If we can find a spill slot for the incoming value, /// mark that slot as allocated, and reuse the same slot for this safepoint. @@ -229,11 +238,8 @@ SelectionDAGBuilder &Builder) { SDValue Incoming = Builder.getValue(IncomingValue); - if (isa(Incoming) || isa(Incoming)) { - // We won't need to spill this, so no need to check for previously - // allocated stack slots + if (isNonRelocatablePtr(Incoming)) return; - } SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming); if (OldLocation.getNode()) @@ -273,6 +279,31 @@ Builder.StatepointLowering.setLocation(Incoming, Loc); } +/// Sort Ptrs vector so that pointers which need no relocation (constants and +/// allocas) are placed at the end and those which need relocation are +/// contiguously occupy beginning of the vector. +/// Synchroniously update Bases and Relocs vectors. +static unsigned sortGCPtrs(SmallVectorImpl &Bases, + SmallVectorImpl &Ptrs, + SmallVectorImpl &Relocs, + SelectionDAGBuilder &Builder) { + unsigned curPos = 0; + for (unsigned i = 0, e = Ptrs.size(); i < e; ++i) { + SDValue SDV = Builder.getValue(Ptrs[i]); + if (isNonRelocatablePtr(SDV) || SDV.getOpcode() == ISD::UNDEF || + SDV.getValueType().getSizeInBits() > 64) { + continue; + } + if (curPos < i) { + std::swap(Bases[curPos], Bases[i]); + std::swap(Ptrs[curPos], Ptrs[i]); + std::swap(Relocs[curPos], Relocs[i]); + } + ++curPos; + } + return curPos; +} + /// Extract call from statepoint, lower it and return pointer to the /// call node. Also update NodeMap so that getValue(statepoint) will /// reference lowered call result @@ -366,7 +397,7 @@ StoreMMO); MMO = getMachineMemOperand(MF, *cast(Loc)); - + Builder.StatepointLowering.setLocation(Incoming, Loc); } @@ -443,7 +474,9 @@ /// will be set to the last value spilled (if any were). static void lowerStatepointMetaArgs(SmallVectorImpl &Ops, - SmallVectorImpl &MemRefs, SelectionDAGBuilder::StatepointLoweringInfo &SI, + SmallVectorImpl &MemRefs, + unsigned NumVRegGCArgs, unsigned &GCArgStart, + SelectionDAGBuilder::StatepointLoweringInfo &SI, SelectionDAGBuilder &Builder) { // Lower the deopt and gc arguments for this statepoint. Layout will be: // deopt argument length, deopt arguments.., gc arguments... @@ -500,7 +533,11 @@ }; auto requireSpillSlot = [&](const Value *V) { - return !(LiveInDeopt || UseRegistersForDeoptValues) || isGCValue(V); + if (isGCValue(V)) { + auto *Ty = V->getType(); + return Ty->isVectorTy() || !UseRegistersForGCPointers; + } + return !(LiveInDeopt || UseRegistersForDeoptValues); }; // Before we actually start lowering (and allocating spill slots for values), @@ -512,7 +549,7 @@ if (requireSpillSlot(V)) reservePreviousStackSlotForValue(V, Builder); } - for (unsigned i = 0; i < SI.Bases.size(); ++i) { + for (unsigned i = NumVRegGCArgs; i < SI.Bases.size(); ++i) { reservePreviousStackSlotForValue(SI.Bases[i], Builder); reservePreviousStackSlotForValue(SI.Ptrs[i], Builder); } @@ -540,21 +577,31 @@ Builder); } + GCArgStart = Ops.size(); // Finally, go ahead and lower all the gc arguments. There's no prefixed // length for this one. After lowering, we'll have the base and pointer // arrays interwoven with each (lowered) base pointer immediately followed by // it's (lowered) derived pointer. i.e // (base[0], ptr[0], base[1], ptr[1], ...) + // Lower first `NumVRegGCArgs` base AND derived pointers through VRegs. + // In future we might use more sophisticated strategy for choosing which + // pointers to pass via virtual registers, but for now this simple approach + // looks good enough. Take into account these facts: + // - NumVRegGCArgs is limited by the max number of tied registers in MI; + // - We relocate (and so need tied defs for) only derived pointers; + // - Quite often base and derived pointer are the same. + auto &SL = Builder.StatepointLowering; for (unsigned i = 0; i < SI.Bases.size(); ++i) { + bool RequireSpillSlot = (i >= NumVRegGCArgs); const Value *Base = SI.Bases[i]; - lowerIncomingStatepointValue(Builder.getValue(Base), - /*RequireSpillSlot*/ true, Ops, MemRefs, - Builder); + lowerIncomingStatepointValue(Builder.getValue(Base), RequireSpillSlot, Ops, + MemRefs, Builder); const Value *Ptr = SI.Ptrs[i]; - lowerIncomingStatepointValue(Builder.getValue(Ptr), - /*RequireSpillSlot*/ true, Ops, MemRefs, - Builder); + SDValue SDV = Builder.getValue(Ptr); + lowerIncomingStatepointValue(SDV, RequireSpillSlot, Ops, MemRefs, Builder); + if (!RequireSpillSlot && !SL.getLocation(SDV)) + SL.setLocation(SDV, Builder.DAG.getConstant(i, SDLoc(), MVT::i64)); } // If there are any explicit spill slots passed to the statepoint, record @@ -582,6 +629,7 @@ // values, while previous loops account only values with unique SDValues. const Instruction *StatepointInstr = SI.StatepointInstr; auto &SpillMap = Builder.FuncInfo.StatepointSpillMaps[StatepointInstr]; + auto &DPtrMap = Builder.FuncInfo.DerivedPtrMap[StatepointInstr]; for (const GCRelocateInst *Relocate : SI.GCRelocates) { const Value *V = Relocate->getDerivedPtr(); @@ -589,7 +637,12 @@ SDValue Loc = Builder.StatepointLowering.getLocation(SDV); if (Loc.getNode()) { - SpillMap[V] = cast(Loc)->getIndex(); + if (auto FI = dyn_cast(Loc)) + SpillMap[V] = FI->getIndex(); + else { + DPtrMap[V] = cast(Loc)->getZExtValue(); + SpillMap[V] = None; + } } else { // Record value as visited, but not spilled. This is case for allocas // and constants. For this values we can avoid emitting spill load while @@ -629,10 +682,27 @@ StatepointLowering.scheduleRelocCall(*Reloc); #endif + unsigned NumVRegs = 0; + + if (UseRegistersForGCPointers) { + const unsigned MaxTiedRegs = 15U; + + // Sort vectors so that elements which need relocation are laid out + // contiguously at the beginning of vectors. + // This is dictated by the StatepointSDNode implementation: due to size limit, + // one cannot put vector into class derived from SDNode, we can only keep a + // number of relocations. So N results of StatepointSDNode map 1-1 to the + // first N derived pointers. + NumVRegs = sortGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this); + NumVRegs = std::min(NumVRegs, MaxTiedRegs); + } + // Lower statepoint vmstate and gcstate arguments SmallVector LoweredMetaArgs; SmallVector MemRefs; - lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, SI, *this); + unsigned GCArgStart; + lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, NumVRegs, GCArgStart, SI, + *this); // Now that we've emitted the spills, we need to update the root so that the // call sequence is ordered correctly. @@ -732,6 +802,7 @@ pushStackMapConstant(Ops, *this, Flags); // Insert all vmstate and gcstate arguments + GCArgStart += Ops.size(); Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end()); // Add register mask from call node @@ -744,15 +815,10 @@ if (Glue.getNode()) Ops.push_back(Glue); - // Compute return values. Provide a glue output since we consume one as - // input. This allows someone else to chain off us as needed. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - - MachineSDNode *StatepointMCNode = - DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops); - DAG.setNodeMemRefs(StatepointMCNode, MemRefs); + SDValue STV = + DAG.getStatepoint(getCurSDLoc(), ReturnVal, GCArgStart, NumVRegs, Ops); - SDNode *SinkNode = StatepointMCNode; + SDNode *SinkNode = STV.getNode(); // Build the GC_TRANSITION_END node if necessary. // @@ -762,7 +828,7 @@ SmallVector TEOps; // Add chain - TEOps.push_back(SDValue(StatepointMCNode, 0)); + TEOps.push_back(SDValue(STV.getNode(), STV->getNumValues() - 2)); // Add GC transition arguments for (const Value *V : SI.GCTransitionArgs) { @@ -772,7 +838,7 @@ } // Add glue - TEOps.push_back(SDValue(StatepointMCNode, 1)); + TEOps.push_back(SDValue(STV.getNode(), STV->getNumValues() - 1)); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -783,7 +849,12 @@ } // Replace original call - DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root + // Call: ch,glue = CALL ... + // Statepoint: [gc relocates],ch,glue = GC_STATEPOINT ... + unsigned NumSinkValues = SinkNode->getNumValues(); + SDValue StatepointValues[2] = {SDValue(SinkNode, NumSinkValues - 2), + SDValue(SinkNode, NumSinkValues - 1)}; + DAG.ReplaceAllUsesWith(CallNode, StatepointValues); // Remove original call node DAG.DeleteNode(CallNode); @@ -796,7 +867,7 @@ // previously emitted STATEPOINT value. Unfortunately, this doesn't appear // to actually be possible today. - return ReturnVal; + return STV; } void @@ -870,21 +941,46 @@ SI.NumPatchBytes = ISP.getNumPatchBytes(); SI.EHPadBB = EHPadBB; - SDValue ReturnValue = LowerAsSTATEPOINT(SI); + SDValue STV = LowerAsSTATEPOINT(SI); // Export the result value if needed + const BasicBlock *BB = ISP.getCall()->getParent(); + std::vector RV = ISP.getRelocates(); + bool NeedExport = llvm::any_of( + RV, [&BB](const GCRelocateInst *I) { return I->getParent() != BB; }); + + // If any of relocates or result value will be used in different basic + // block, we need to export them manually. Default exporting mechanism + // will not work here because it is based on IR Value types, and + // IR statepoint has different type than the actual call or relocates. + // It means that by default llvm will create export register of the wrong + // type (always i32 - TokenTy - in our case). So instead we need to create + // export registers manually. + // TODO: To eliminate this problem we can remove gc.result/gc.relocate + // intrinsics completely and make statepoint call to return a tuple. + setValue(ISP.getInstruction(), STV); + if (NeedExport) { + LLVMContext *Context = DAG.getContext(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + for (unsigned i = 0, e = STV->getNumValues() - 2; i < e; ++i) { + Value *DerivedPtr = SI.GCRelocates[i]->getDerivedPtr(); + SDValue Res(STV.getNode(), i); + Type *Ty = DerivedPtr->getType(); + unsigned Reg = FuncInfo.CreateRegs(Ty); + RegsForValue RFV(*Context, TLI, DL, Reg, Ty, None); + SDValue Chain = DAG.getEntryNode(); + + RFV.getCopyToRegs(Res, DAG, getCurSDLoc(), Chain, nullptr, DerivedPtr); + PendingExports.push_back(Chain); + FuncInfo.StatepointRegs[SI.StatepointInstr].push_back(Reg); + } + } const GCResultInst *GCResult = ISP.getGCResult(); Type *RetTy = ISP.getActualReturnType(); if (!RetTy->isVoidTy() && GCResult) { if (GCResult->getParent() != ISP.getCall()->getParent()) { - // Result value will be used in a different basic block so we need to - // export it now. Default exporting mechanism will not work here because - // statepoint call has a different type than the actual call. It means - // that by default llvm will create export register of the wrong type - // (always i32 in our case). So instead we need to create export register - // with correct type manually. - // TODO: To eliminate this problem we can remove gc.result intrinsics - // completely and make statepoint call to return a tuple. + SDValue ReturnValue = cast(STV)->getActualRetVal(); unsigned Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Reg, RetTy, @@ -894,16 +990,7 @@ RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr); PendingExports.push_back(Chain); FuncInfo.ValueMap[ISP.getInstruction()] = Reg; - } else { - // Result value will be used in a same basic block. Don't export it or - // perform any explicit register copies. - // We'll replace the actuall call node shortly. gc_result will grab - // this value. - setValue(ISP.getInstruction(), ReturnValue); } - } else { - // The token value is never used from here on, just generate a poison value - setValue(ISP.getInstruction(), DAG.getIntPtrConstant(-1, getCurSDLoc())); } } @@ -934,7 +1021,9 @@ // NB! The GC arguments are deliberately left empty. - if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) { + SDValue STV = LowerAsSTATEPOINT(SI); + if (SDValue ReturnVal = + cast(STV.getNode())->getActualRetVal()) { ReturnVal = lowerRangeToAssertZExt(DAG, *Call, ReturnVal); setValue(Call, ReturnVal); } @@ -968,17 +1057,19 @@ assert(CopyFromReg.getNode()); setValue(&CI, CopyFromReg); } else { - setValue(&CI, getValue(I)); + StatepointSDNode *STN = cast(getValue(I)); + setValue(&CI, STN->getActualRetVal()); } } void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { + const BasicBlock *StatepointBB = Relocate.getStatepoint()->getParent(); #ifndef NDEBUG // Consistency check // We skip this check for relocates not in the same basic block as their // statepoint. It would be too expensive to preserve validation info through // different basic blocks. - if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) + if (StatepointBB == Relocate.getParent()) StatepointLowering.relocCallVisited(Relocate); auto *Ty = Relocate.getType()->getScalarType(); @@ -994,6 +1085,33 @@ assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value"); Optional DerivedPtrLocation = SlotIt->second; + auto &DPtrMap = FuncInfo.DerivedPtrMap[Relocate.getStatepoint()]; + auto It = DPtrMap.find(Relocate.getDerivedPtr()); + if (It != DPtrMap.end()) { + // This GC ptr is lowered through VReg. + unsigned Index = It->second; + SDValue Result; + auto &StatepointRegs = FuncInfo.StatepointRegs[Relocate.getStatepoint()]; + if (StatepointBB != Relocate.getParent()) { + // Statepoint is in different basic block. Default getValue() mechanism + // does not work here, so we need create CopyFromRegs manually. + // See comment in LowerStatepoint for details. + assert(Index < StatepointRegs.size()); + unsigned InReg = StatepointRegs[Index]; + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), + DAG.getDataLayout(), InReg, DerivedPtr->getType(), + None); // This is not an ABI copy. + SDValue Chain = DAG.getEntryNode(); + Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, + DerivedPtr); + } else { + SDNode *Statepoint = getValue(Relocate.getStatepoint()).getNode(); + Result = SDValue(Statepoint, Index); + } + setValue(&Relocate, Result); + return; + } + // We didn't need to spill these special cases (constants and allocas). // See the handling in spillIncomingValueForStatepoint for detail. if (!DerivedPtrLocation) { diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1014,11 +1014,10 @@ // STATEPOINT Deopt Alloca - live-through, read only, direct // (We're currently conservative and mark the deopt slots read/write in // practice.) - // STATEPOINT GC Spill - live-through, read/write, indirect + // STATEPOINT GC Spill - live-through, read/write, indirect or vreg // STATEPOINT GC Alloca - live-through, read/write, direct - // The live-in vs live-through is handled already (the live through ones are - // all stack slots), but we need to handle the different type of stackmap - // operands and memory effects here. + // The live-in vs live-through is handled already but we need to handle + // the different type of stackmap operands and memory effects here. // MI changes inside this loop as we grow operands. for(unsigned OperIdx = 0; OperIdx != MI->getNumOperands(); ++OperIdx) { @@ -1031,9 +1030,19 @@ int FI = MO.getIndex(); MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), MI->getDesc()); + auto transferOperand = [MIB, MI](unsigned i) { + const MachineOperand &O = MI->getOperand(i); + unsigned TiedTo = i; + if (O.isReg() && O.isTied()) + TiedTo = MI->findTiedOperandIdx(i); + MIB.add(O); + if (TiedTo < i) + MIB->tieOperands(TiedTo, MIB->getNumOperands() - 1); + }; + // Copy operands before the frame-index. for (unsigned i = 0; i < OperIdx; ++i) - MIB.add(MI->getOperand(i)); + transferOperand(i); // Add frame index operands recognized by stackmaps.cpp if (MFI.isStatepointSpillSlotObjectIndex(FI)) { // indirect-mem-ref tag, size, #FI, offset. @@ -1054,7 +1063,7 @@ } // Copy the operands after the frame index. for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i) - MIB.add(MI->getOperand(i)); + transferOperand(i); // Inherit previous memory operands. MIB.cloneMemRefs(*MI);