Index: lib/CodeGen/VirtRegMap.cpp =================================================================== --- lib/CodeGen/VirtRegMap.cpp +++ lib/CodeGen/VirtRegMap.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -152,6 +153,20 @@ // according to LiveIntervals. // namespace { + +// Used to record register renaming done by copyProp, used by +// fixupKillFlagsAfterCopyProp to fix kill flags. +struct RegRename { + RegRename(unsigned DstVirtReg, unsigned SrcReg) + : DstVirtReg(DstVirtReg), SrcReg(SrcReg) {} + + unsigned DstVirtReg; + unsigned SrcReg; // Can be virtual or physical +}; + +// Map from physical regunit to list of def SlotIndexes. +using PhysDefsMap = DenseMap>; + class VirtRegRewriter : public MachineFunctionPass { MachineFunction *MF; const TargetMachine *TM; @@ -161,8 +176,11 @@ SlotIndexes *Indexes; LiveIntervals *LIS; VirtRegMap *VRM; + MachineBlockFrequencyInfo *MBFI; void rewrite(); + void copyProp(SmallVectorImpl&); + void fixupKillFlagsAfterCopyProp(const SmallVectorImpl&); void addMBBLiveIns(); bool readsUndefSubreg(const MachineOperand &MO) const; void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const; @@ -191,6 +209,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) INITIALIZE_PASS_DEPENDENCY(LiveStacks) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter", "Virtual Register Rewriter", false, false) @@ -205,6 +224,8 @@ AU.addRequired(); AU.addPreserved(); AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -217,14 +238,23 @@ Indexes = &getAnalysis(); LIS = &getAnalysis(); VRM = &getAnalysis(); + MBFI = &getAnalysis(); DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " << MF->getName() << '\n'); DEBUG(VRM->dump()); + // RegRenames records the register renamings done by copyProp for use by + // fixupKillFlagsAfterCopyProp in fixing up kill flags on renamed registers. + SmallVector RegRenames; + copyProp(RegRenames); + // Add kill flags while we still have virtual registers. LIS->addKillFlags(VRM); + // Remove kill flags to take into account register renamings done by copyProp. + fixupKillFlagsAfterCopyProp(RegRenames); + // Live-in lists on basic blocks are required for physregs. addMBBLiveIns(); @@ -241,6 +271,246 @@ return true; } +static unsigned getPhysReg(const VirtRegMap &VRM, unsigned Reg) { + return TargetRegisterInfo::isVirtualRegister(Reg) ? VRM.getPhys(Reg) : Reg; +}; + +// Record PhysReg as being defined at every Index in LI that a def occurs. +static void addMappedPhysDefs(PhysDefsMap &MappedPhysDefs, + const LiveInterval &LI, unsigned PhysReg, + const TargetRegisterInfo &TRI) { + for (MCRegUnitIterator Units(PhysReg, &TRI); Units.isValid(); ++Units) { + SmallVectorImpl &DefIndexes = MappedPhysDefs[*Units]; + for (const LiveRange::Segment &S : LI.segments) + DefIndexes.push_back(S.start); + } +} + +// Record PhysReg as no longer being defined at every Index in LI that a def +// occurs. +static void removeMappedPhysDefs(PhysDefsMap &MappedPhysDefs, + const LiveInterval &LI, unsigned PhysReg, + const TargetRegisterInfo &TRI) { + for (MCRegUnitIterator Units(PhysReg, &TRI); Units.isValid(); ++Units) { + SmallVectorImpl &DefIndexes = MappedPhysDefs[*Units]; + for (const LiveRange::Segment &S : LI.segments) { + bool FoundDef = false; + for (auto DI = DefIndexes.begin(); DI != DefIndexes.end(); ++DI) + if (*DI == S.start) { + FoundDef = true; + std::iter_swap(DI, DefIndexes.end()-1); + DefIndexes.pop_back(); + break; + } + assert(FoundDef && "Couldn't find def to remove."); + } + } +} + +// Record the Indexes where every allocated physical register is defined. +static void buildMappedPhysDefs(PhysDefsMap &MappedPhysDefs, + const MachineRegisterInfo &MRI, + const LiveIntervals &LIS, + const TargetRegisterInfo &TRI, + const VirtRegMap &VRM) { + // Build def index list for each physical reg unit allocated to a virtual + // register by scanning all live ranges. + for (size_t VI = 0, VE = MRI.getNumVirtRegs(); VI < VE; ++VI) { + unsigned VReg = TargetRegisterInfo::index2VirtReg(VI); + if (!LIS.hasInterval(VReg)) + continue; + + unsigned VIPhysReg = VRM.getPhys(VReg); + if (VIPhysReg == VirtRegMap::NO_PHYS_REG) + continue; + + const LiveInterval &LI = LIS.getInterval(VReg); + addMappedPhysDefs(MappedPhysDefs, LI, VIPhysReg, TRI); + } +} + +// Look for any LiveRanges that clobber SrcPhysReg over a given live interval. +static bool isClobberedOverInterval(const LiveInterval &LI, unsigned PhysReg, + PhysDefsMap &MappedPhysDefs, + const MachineRegisterInfo &MRI, + const LiveIntervals &LIS, + const TargetRegisterInfo &TRI, + const VirtRegMap &VRM) { + + for (MCRegUnitIterator Units(PhysReg, &TRI); Units.isValid(); ++Units) { + // Check physical reg live intervals. + if (const LiveRange *PLR = LIS.getCachedRegUnit(*Units)) + for (auto SegI = PLR->begin(), SegE = PLR->end(); SegI != SegE; ++SegI) + if (LI.liveAt(SegI->start)) + return true; + + // Check virtual reg live intervals that are assigned to phys reg. + if (MappedPhysDefs.empty()) + buildMappedPhysDefs(MappedPhysDefs, MRI, LIS, TRI, VRM); + auto MLRIt = MappedPhysDefs.find(*Units); + if (MLRIt != MappedPhysDefs.end()) + for (const SlotIndex &DefIndex : MLRIt->second) + if (LI.liveAt(DefIndex)) + return true; + } + + // Check regmask clobbers. + const ArrayRef RegMaskSlots = LIS.getRegMaskSlots(); + const ArrayRef RegMaskBits = LIS.getRegMaskBits(); + SlotIndex BeginIndex = LI.beginIndex(); + SlotIndex EndIndex = LI.endIndex(); + unsigned I = 0, E = RegMaskSlots.size(); + // Skip over regmasks that are before LI starts. + for (; I != E && RegMaskSlots[I] < BeginIndex; ++I) + ; + // Stop once we reach a regmask that is after LI ends. + for (; I != E && RegMaskSlots[I] < EndIndex; ++I) + if (MachineOperand::clobbersPhysReg(RegMaskBits[I], PhysReg) && + LI.liveAt(RegMaskSlots[I])) + return true; + + return false; +} + +// Look for COPY instructions that can be made unnecessary by propagating their +// source value to all users. +void VirtRegRewriter::copyProp(SmallVectorImpl &RegRenames) { + PhysDefsMap MappedPhysDefs; + + for (size_t VI = 0, VE = MRI->getNumVirtRegs(); VI < VE; ++VI) { + unsigned VReg = TargetRegisterInfo::index2VirtReg(VI); + if (!LIS->hasInterval(VReg)) + continue; + const LiveInterval &LI = LIS->getInterval(VReg); + + // Don't consider intervals not assigned to a physical register. + if (VRM->getPhys(LI.reg) == VirtRegMap::NO_PHYS_REG) + continue; + + // Only consider intervals that have a single def that is a full copy + // instruction. + if (!LI.containsOneValue()) + continue; + const MachineInstr *MI = + Indexes->getInstructionFromIndex(LI.getValNumInfo(0)->def); + if (!MI || !MI->isFullCopy()) + continue; + + const MachineOperand &CopyDst = MI->getOperand(0); + unsigned DstReg = CopyDst.getReg(); + + // Only consider renaming virtual registers. + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + continue; + + const MachineOperand &CopySrc = MI->getOperand(1); + unsigned SrcReg = CopySrc.getReg(); + + unsigned DstPhysReg = getPhysReg(*VRM, DstReg); + unsigned SrcPhysReg = getPhysReg(*VRM, SrcReg); + + // This copy will already be removed by handleIdentityCopy. + if (SrcPhysReg == DstPhysReg) + continue; + // Check that the allocated src reg is in the regclass of the dst reg. + if (!MRI->getRegClass(DstReg)->contains(SrcPhysReg)) + continue; + + bool IsSrcConstant = !TargetRegisterInfo::isVirtualRegister(SrcReg) && + MRI->isConstantPhysReg(SrcPhysReg, *MF); + + // Skip COPY from a reserved reg unless it is a known constant. + if (MRI->isReserved(SrcPhysReg) && !IsSrcConstant) + continue; + + // Check the cost/benefit of renaming. + BlockFrequency DefFreq = MBFI->getBlockFreq(MI->getParent()); + BlockFrequency Benefit(DefFreq); + BlockFrequency Cost(0); + for (const MachineOperand &Opnd : MRI->reg_nodbg_operands(DstReg)) { + const MachineInstr& OpndInst = *Opnd.getParent(); + if (Opnd.isDef()) { + assert(&OpndInst == MI); + continue; + } + if (OpndInst.isFullCopy() && &OpndInst.getOperand(1) == &Opnd) { + BlockFrequency OpndFreq = MBFI->getBlockFreq(OpndInst.getParent()); + unsigned OpndDstPhysReg = + getPhysReg(*VRM, OpndInst.getOperand(0).getReg()); + unsigned OpndSrcPhysReg = + getPhysReg(*VRM, OpndInst.getOperand(1).getReg()); + // This COPY would be removed if we don't rename. + if (OpndDstPhysReg == OpndSrcPhysReg) + Cost += OpndFreq; + // Renaming would cause this COPY to also be removed. + if (OpndDstPhysReg == SrcPhysReg) + Benefit += OpndFreq; + } + } + if (Cost >= Benefit) + continue; + + // Check that the source register isn't clobbered at any point that the + // destination register is live. We don't need to check for constant + // registers since they can't be clobbered by definition. + if (!IsSrcConstant && + isClobberedOverInterval(LI, SrcPhysReg, MappedPhysDefs, *MRI, + *LIS, *TRI, *VRM)) + continue; + + DEBUG(dbgs() << "Copyprop reassigning " << PrintReg(DstReg) << " to " + << PrintReg(SrcPhysReg, TRI) << "\n";); + VRM->clearVirt(DstReg); + VRM->assignVirt2Phys(DstReg, SrcPhysReg); + RegRenames.emplace_back(DstReg, SrcReg); + + // Update MappedPhysDefs since we are now clobbering SrcPhysReg at this COPY + // and no longer clobbering DstPhysReg. + // Don't need to update MappedPhysDefs if we haven't built it yet, which can + // be the case if we're renaming to a constant physical reg. + if (!MappedPhysDefs.empty()) + removeMappedPhysDefs(MappedPhysDefs, LI, DstPhysReg, *TRI); + // Don't need to update constant physical reg defs since we never check for + // them. + if (!IsSrcConstant) + addMappedPhysDefs(MappedPhysDefs, LI, SrcPhysReg, *TRI); + } +} + +// Clear kill flags of on uses of remapped registers that are no longer +// accurate. +// For example, in the following code: +// +// vr2 = COPY vr1 +// ... +// OP vr1 +// ... +// OP vr2 +// +// since vr2 and vr1 are now mapped to the same register, the kill flag on vr1 +// must be removed. +void VirtRegRewriter::fixupKillFlagsAfterCopyProp( + const SmallVectorImpl &RegRenames) { + + auto clearOverlappingKill = [](MachineRegisterInfo &MRI, LiveIntervals &LIS, + unsigned Reg1, unsigned Reg2) { + LiveInterval &LI = LIS.getInterval(Reg1); + for (MachineOperand &Use : MRI.use_operands(Reg2)) { + if (Use.isKill()) { + SlotIndex UseIndex = LIS.getInstructionIndex(*Use.getParent()); + if (LI.liveAt(UseIndex)) + Use.setIsKill(false); + } + } + }; + + for (const RegRename &R : RegRenames) + if (TargetRegisterInfo::isVirtualRegister(R.SrcReg)) { + clearOverlappingKill(*MRI, *LIS, R.SrcReg, R.DstVirtReg); + clearOverlappingKill(*MRI, *LIS, R.DstVirtReg, R.SrcReg); + } +} + void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const { assert(!LI.empty()); @@ -400,7 +670,9 @@ unsigned PhysReg = VRM->getPhys(VirtReg); assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Instruction uses unmapped VirtReg"); - assert(!MRI->isReserved(PhysReg) && "Reserved register assignment"); + assert((!MRI->isReserved(PhysReg) || + MRI->isConstantPhysReg(PhysReg, *MF)) && + "Reserved register assignment"); // Preserve semantics of sub-register operands. unsigned SubReg = MO.getSubReg(); Index: test/CodeGen/AArch64/machine_cse_impdef_killflags.ll =================================================================== --- test/CodeGen/AArch64/machine_cse_impdef_killflags.ll +++ test/CodeGen/AArch64/machine_cse_impdef_killflags.ll @@ -5,12 +5,11 @@ ; The verifier would complain otherwise. define i64 @csed-impdef-killflag(i64 %a) { ; CHECK-LABEL: csed-impdef-killflag -; CHECK-DAG: mov [[REG0:w[0-9]+]], wzr ; CHECK-DAG: orr [[REG1:w[0-9]+]], wzr, #0x1 ; CHECK-DAG: orr [[REG2:x[0-9]+]], xzr, #0x2 ; CHECK-DAG: orr [[REG3:x[0-9]+]], xzr, #0x3 ; CHECK: cmp x0, #0 -; CHECK-DAG: csel w[[SELECT_WREG_1:[0-9]+]], [[REG0]], [[REG1]], ne +; CHECK-DAG: csel w[[SELECT_WREG_1:[0-9]+]], wzr, [[REG1]], ne ; CHECK-DAG: csel [[SELECT_XREG_2:x[0-9]+]], [[REG2]], [[REG3]], ne ; CHECK: ubfx [[SELECT_XREG_1:x[0-9]+]], x[[SELECT_WREG_1]], #0, #32 ; CHECK-NEXT: add x0, [[SELECT_XREG_2]], [[SELECT_XREG_1]] Index: test/CodeGen/X86/ipra-local-linkage.ll =================================================================== --- test/CodeGen/X86/ipra-local-linkage.ll +++ test/CodeGen/X86/ipra-local-linkage.ll @@ -24,7 +24,7 @@ call void @foo() ; CHECK-LABEL: bar: ; CHECK: callq foo - ; CHECK-NEXT: movl %eax, %r15d + ; CHECK-NEXT: movl %edi, %r15d call void asm sideeffect "movl $0, %r12d", "{r15}~{r12}"(i32 %X) ret void } Index: test/CodeGen/X86/mul-i1024.ll =================================================================== --- test/CodeGen/X86/mul-i1024.ll +++ test/CodeGen/X86/mul-i1024.ll @@ -4296,18 +4296,16 @@ ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: adcq %rbp, %rcx ; X64-NEXT: movq %rcx, %rbp -; X64-NEXT: movq %r11, %rcx -; X64-NEXT: movq (%rcx), %r8 +; X64-NEXT: movq (%r11), %r8 ; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq %r12, %rsi -; X64-NEXT: mulq %rsi +; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq 8(%rcx), %r11 +; X64-NEXT: movq 8(%r11), %r11 ; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %rsi +; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: addq %rdi, %rsi ; X64-NEXT: adcq $0, %rdx @@ -4359,8 +4357,7 @@ ; X64-NEXT: addq %r9, %rbx ; X64-NEXT: adcq %rdx, %rsi ; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq %r8, %r15 -; X64-NEXT: movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, %rdi @@ -4368,8 +4365,7 @@ ; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload ; X64-NEXT: addq %rbx, %rdi ; X64-NEXT: adcq %rsi, %rcx @@ -4383,7 +4379,7 @@ ; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: addq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload ; X64-NEXT: sbbq %rax, %rax -; X64-NEXT: movq %r15, %rcx +; X64-NEXT: movq %r8, %rcx ; X64-NEXT: imulq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload ; X64-NEXT: imulq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload @@ -4723,8 +4719,7 @@ ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload -; X64-NEXT: movq %r9, %rsi -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %r9, %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r14 @@ -4735,7 +4730,7 @@ ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %r8, %rcx ; X64-NEXT: adcq $0, %rbx -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq {{[0-9]+}}(%rsp) # 8-byte Folded Reload ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r15 @@ -5136,15 +5131,14 @@ ; X64-NEXT: movq %r9, %rsi ; X64-NEXT: movq 64(%rsi), %rax ; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq %r14, %rbp -; X64-NEXT: mulq %rbp +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq 72(%rsi), %r8 ; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %rbp +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: addq %rbx, %rsi ; X64-NEXT: adcq $0, %rdx @@ -5310,8 +5304,8 @@ ; X64-NEXT: adcq %rbx, %rdx ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload +; X64-NEXT: imulq %r12, %rbx ; X64-NEXT: movq %r12, %rax -; X64-NEXT: imulq %rax, %rbx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, %rsi Index: test/CodeGen/X86/mul-i512.ll =================================================================== --- test/CodeGen/X86/mul-i512.ll +++ test/CodeGen/X86/mul-i512.ll @@ -899,9 +899,8 @@ ; X64-NEXT: addq %rbx, %rcx ; X64-NEXT: sbbq %rax, %rax ; X64-NEXT: andl $1, %eax -; X64-NEXT: movq %r15, %rbx -; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rbx, %rdx +; X64-NEXT: movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r15, %rdx ; X64-NEXT: imulq %rbp, %rdx ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rbp, %r12 @@ -919,7 +918,7 @@ ; X64-NEXT: movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %rbx