Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -151,13 +151,13 @@ /// Attempt to join these two intervals. On failure, this /// returns false. The output "SrcInt" will not have been modified, so we /// can use this information below to update aliases. - bool joinIntervals(CoalescerPair &CP); + bool joinIntervals(CoalescerPair &CP, MachineInstr &CopyMI); /// Attempt joining two virtual registers. Return true on success. bool joinVirtRegs(CoalescerPair &CP); /// Attempt joining with a reserved physreg. - bool joinReservedPhysReg(CoalescerPair &CP); + bool joinReservedPhysReg(CoalescerPair &CP, MachineInstr &CopyMI); /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI. /// Subranges in @p LI which only partially interfere with the desired @@ -1647,7 +1647,7 @@ // Otherwise, if one of the intervals being joined is a physreg, this method // always canonicalizes DstInt to be it. The output "SrcInt" will not have // been modified, so we can use this information below to update aliases. - if (!joinIntervals(CP)) { + if (!joinIntervals(CP, *CopyMI)) { // Coalescing failed. // If definition of source is defined by trivial computation, try @@ -1742,7 +1742,8 @@ return true; } -bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { +bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP, + MachineInstr &CopyMI) { unsigned DstReg = CP.getDstReg(); unsigned SrcReg = CP.getSrcReg(); assert(CP.isPhys() && "Must be a physreg copy"); @@ -1750,7 +1751,8 @@ LiveInterval &RHS = LIS->getInterval(SrcReg); DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); - assert(RHS.containsOneValue() && "Invalid join with reserved register"); + if (!MRI->hasOneDef(SrcReg)) + return false; // Optimization for reserved registers like ESP. We can only merge with a // reserved physreg if RHS has a single value that is a copy of DstReg. @@ -1786,8 +1788,6 @@ // register live range doesn't need to be accurate as long as all the // defs are there. - // Delete the identity copy. - MachineInstr *CopyMI; if (CP.isFlipped()) { // Physreg is copied into vreg // %vregY = COPY %X @@ -1796,7 +1796,8 @@ // => // ... // use %X - CopyMI = MRI->getVRegDef(SrcReg); + + // We already tested for definitions of DstReg above. } else { // VReg is copied into physreg: // %vregX = def @@ -1805,31 +1806,39 @@ // => // %Y = def // ... - if (!MRI->hasOneNonDBGUse(SrcReg)) { - DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); - return false; - } + const MachineOperand &DefMO = *MRI->def_begin(SrcReg); + const MachineInstr &DestMI = *DefMO.getParent(); + SlotIndex DestRegIdx = + LIS->getInstructionIndex(DestMI).getRegSlot(DefMO.isEarlyClobber()); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); - if (!LIS->intervalIsInOneMBB(RHS)) { - DEBUG(dbgs() << "\t\tComplex control flow!\n"); - return false; - } + if (!MRI->isConstantPhysReg(DstReg)) { + // We do not track exact physreg liveranges so this is only valid within + // a basic block. Otherwise we may miss reads on other CFG paths. + if (!LIS->intervalIsInOneMBB(RHS)) + return false; - MachineInstr &DestMI = *MRI->getVRegDef(SrcReg); - CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg); - SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); - SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot(); + assert(RHS.size() == 1 && "Liverange must be a single segment"); - if (!MRI->isConstantPhysReg(DstReg)) { // We checked above that there are no interfering defs of the physical // register. However, for this case, where we intent to move up the def of - // the physical register, we also need to check for interfering uses. - SlotIndexes *Indexes = LIS->getSlotIndexes(); - for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx); - SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) { - MachineInstr *MI = LIS->getInstructionFromIndex(SI); - if (MI->readsRegister(DstReg, TRI)) { - DEBUG(dbgs() << "\t\tInterference (read): " << *MI); + // the physical register, we also need to check that there are no uses + // of DstReg during the lifetime of SrcReg. + SlotIndexes &Indexes = *LIS->getSlotIndexes(); + // Check instructions after the definition, for early clobber defs even + // the defining instruction. + const LiveRange::Segment &S = *RHS.begin(); + SlotIndex Start = + DefMO.isEarlyClobber() ? S.start : Indexes.getNextNonNullIndex(S.start); + + assert(Start <= CopyIdx && CopyIdx <= S.end && "valid segment"); + + // Check instructions in range for uses of DstReg. + for (SlotIndex SI = Start; SI < CopyIdx; + SI = Indexes.getNextNonNullIndex(SI)) { + MachineInstr &MI = *LIS->getInstructionFromIndex(SI); + if (MI.readsRegister(DstReg, TRI)) { + DEBUG(dbgs() << "\t\tInterference (read): " << MI); return false; } } @@ -1838,9 +1847,9 @@ // We're going to remove the copy which defines a physical reserved // register, so remove its valno, etc. DEBUG(dbgs() << "\t\tRemoving phys reg def of " << PrintReg(DstReg, TRI) - << " at " << CopyRegIdx << "\n"); + << " at " << CopyIdx << "\n"); - LIS->removePhysRegDefAt(DstReg, CopyRegIdx); + LIS->removePhysRegDefAt(DstReg, CopyIdx); // Create a new dead def at the new def location. for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) { LiveRange &LR = LIS->getRegUnit(*UI); @@ -1848,8 +1857,9 @@ } } - LIS->RemoveMachineInstrFromMaps(*CopyMI); - CopyMI->eraseFromParent(); + // Delete the identity copy. + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI.eraseFromParent(); // We don't track kills for reserved registers. MRI->clearKillFlags(CP.getSrcReg()); @@ -3073,8 +3083,8 @@ return true; } -bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { - return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP); +bool RegisterCoalescer::joinIntervals(CoalescerPair &CP, MachineInstr &CopyMI) { + return CP.isPhys() ? joinReservedPhysReg(CP, CopyMI) : joinVirtRegs(CP); } namespace { Index: test/CodeGen/AArch64/regcoal-physreg.mir =================================================================== --- test/CodeGen/AArch64/regcoal-physreg.mir +++ test/CodeGen/AArch64/regcoal-physreg.mir @@ -86,6 +86,22 @@ %9 : gpr64sp = SUBXri %fp, 8, 0 STRXui %fp, %fp, 0 %fp = COPY %9 + + ; Multiple uses should not stop coalescing. + ; CHECK: %fp = SUBXri %fp, 12, 0 + ; CHECK: STRXui undef %x0, %fp, 0 + ; CHECK-NOT: %p = COPY + %10 : gpr64sp = SUBXri %fp, 12, 0 + STRXui undef %x0, %10, 0 + %fp = COPY %10 + + ; Cannot coalesce when vreg definition is an earlyclobber slot and the + ; physreg is read in the same instruction. + ; CHECK-NOT: HINT 0, implicit %fp, implicit-def early-clobber %fp + ; CHECK: HINT 0, implicit %fp, implicit-def early-clobber %11 + ; CHECK: %fp = COPY %11 + HINT 0, implicit %fp, implicit-def early-clobber %11 : gpr64 + %fp = COPY %11 ... --- # Check coalescing of COPYs from reserved physregs. Index: test/CodeGen/SPARC/2013-05-17-CallFrame.ll =================================================================== --- test/CodeGen/SPARC/2013-05-17-CallFrame.ll +++ test/CodeGen/SPARC/2013-05-17-CallFrame.ll @@ -8,8 +8,8 @@ ; V8: add %i0, 7, %i0 ; V8-NEXT: and %i0, -8, %i0 ; V8-NEXT: add %i0, 8, %i0 -; V8-NEXT: sub %sp, %i0, %i0 -; V8-NEXT: add %i0, 96, %o0 +; V8-NEXT: sub %sp, %i0, %sp +; V8-NEXT: add %sp, 96, %o0 ; V8: add %sp, -16, %sp ; V8: call foo ; V8: add %sp, 16, %sp