diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -727,6 +727,10 @@ // since we don't want to trust live-in lists. if (MBB.succ_empty()) { for (MachineInstr *MaybeDead : MaybeDeadCopies) { + // Skip COPYs used as part of FrameDestroy, since the code here just + // assumes nothing is live-out in a block without successors... + if (MaybeDead->getFlag(MachineInstr::FrameDestroy)) + continue; LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: "; MaybeDead->dump()); assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg())); diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -464,6 +464,10 @@ // Calculate offsets. for (std::vector::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { + // TODO + if (I->isSpilledToReg()) + continue; + int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); @@ -1493,32 +1497,37 @@ if (IsFunclet) NumBytes = getWinEHFuncletFrameSize(MF); - // Skip the callee-saved push instructions. + // Skip the callee-saved push and copy instructions. bool PushedRegs = false; int StackOffset = 2 * stackGrowth; while (MBBI != MBB.end() && - MBBI->getFlag(MachineInstr::FrameSetup) && - (MBBI->getOpcode() == X86::PUSH32r || - MBBI->getOpcode() == X86::PUSH64r)) { + MBBI->getFlag(MachineInstr::FrameSetup)) { + unsigned Opcode = MBBI->getOpcode(); + if (Opcode != X86::PUSH32r && Opcode != X86::PUSH64r && + Opcode != TargetOpcode::COPY) { + break; + } PushedRegs = true; Register Reg = MBBI->getOperand(0).getReg(); ++MBBI; - if (!HasFP && NeedsDwarfCFI) { - // Mark callee-saved push instruction. - // Define the current CFA rule to use the provided offset. - assert(StackSize); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset)); - StackOffset += stackGrowth; - } + if (Opcode != TargetOpcode::COPY) { + if (!HasFP && NeedsDwarfCFI) { + // Mark callee-saved push instruction. + // Define the current CFA rule to use the provided offset. + assert(StackSize); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset)); + StackOffset += stackGrowth; + } - if (NeedsWinCFI) { - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) - .addImm(Reg) - .setMIFlag(MachineInstr::FrameSetup); + if (NeedsWinCFI) { + HasWinCFI = true; + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) + .addImm(Reg) + .setMIFlag(MachineInstr::FrameSetup); + } } } @@ -1987,7 +1996,8 @@ if (Opc != X86::DBG_VALUE && !PI->isTerminator()) { if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) && - (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy))) + (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) && + (Opc != TargetOpcode::COPY || !PI->getFlag(MachineInstr::FrameDestroy))) break; FirstCSPop = PI; } @@ -2355,6 +2365,8 @@ } } + unsigned lastUsedXmm = 0; + // Assign slots for GPRs. It increases frame size. for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i - 1].getReg(); @@ -2362,6 +2374,27 @@ if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) continue; + // Try to spill to XMM register. + if (Reg != X86::RBP && X86::GR64RegClass.contains(Reg) && !MF.callsUnwindInit()) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + MCRegister SpillReg = MCRegister::NoRegister; + for (unsigned NumRegs = X86::FR64RegClass.getNumRegs(); + lastUsedXmm < NumRegs; lastUsedXmm++) { + MCRegister Candidate = X86::FR64RegClass.getRegister(lastUsedXmm); + if (!MRI.isPhysRegUsed(Candidate)) { + SpillReg = Candidate; + lastUsedXmm++; + break; + } + } + if (SpillReg != MCRegister::NoRegister) { + LLVM_DEBUG(dbgs() << "Save " << printReg(Reg, TRI) + << " by copy to " << printReg(SpillReg, TRI) << '\n'); + CSI[i - 1].setDstReg(SpillReg); + continue; + } + } + SpillSlotOffset -= SlotSize; CalleeSavedFrameSize += SlotSize; @@ -2420,7 +2453,8 @@ const MachineFunction &MF = *MBB.getParent(); unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { - unsigned Reg = CSI[i - 1].getReg(); + const CalleeSavedInfo& CI = CSI[i - 1]; + unsigned Reg = CI.getReg(); if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) continue; @@ -2442,6 +2476,14 @@ } } + if (CI.isSpilledToReg()) { + BuildMI(MBB, MI, DL, TII.get(TargetOpcode::COPY), + CI.getDstReg()) + .addReg(Reg, getKillRegState(CanKill)) + .setMIFlag(MachineInstr::FrameSetup); + continue; + } + // Do not set a kill flag on values that are also marked as live-in. This // happens with the @llvm-returnaddress intrinsic and with arguments // passed in callee saved registers. @@ -2549,12 +2591,19 @@ // POP GPRs. unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); + for (const CalleeSavedInfo& CI : CSI) { + unsigned Reg = CI.getReg(); if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) continue; + if (CI.isSpilledToReg()) { + BuildMI(MBB, MI, DL, TII.get(TargetOpcode::COPY), Reg) + .addReg(CI.getDstReg(), RegState::Kill) + .setMIFlag(MachineInstr::FrameDestroy); + continue; + } + BuildMI(MBB, MI, DL, TII.get(Opc), Reg) .setMIFlag(MachineInstr::FrameDestroy); }