Index: lib/CodeGen/MachineCopyPropagation.cpp =================================================================== --- lib/CodeGen/MachineCopyPropagation.cpp +++ lib/CodeGen/MachineCopyPropagation.cpp @@ -45,11 +45,11 @@ bool runOnMachineFunction(MachineFunction &MF) override; - private: typedef SmallVector DestList; typedef DenseMap SourceMap; typedef DenseMap Reg2MIMap; + private: void SourceNoLongerAvailable(unsigned Reg); void CopyPropagateBlock(MachineBasicBlock &MBB); @@ -84,21 +84,6 @@ } } -static bool NoInterveningSideEffect(const MachineInstr *CopyMI, - const MachineInstr *MI) { - const MachineBasicBlock *MBB = CopyMI->getParent(); - if (MI->getParent() != MBB) - return false; - - for (MachineBasicBlock::const_iterator I = std::next(CopyMI->getIterator()), - E = MBB->end(), E2 = MI->getIterator(); I != E && I != E2; ++I) { - if (I->hasUnmodeledSideEffects() || I->isCall() || - I->isTerminator()) - return false; - } - return true; -} - /// isNopCopy - Return true if the specified copy is really a nop. That is /// if the source of the copy is the same of the definition of the copy that /// supplied the source. If the source of the copy is a sub-register than it @@ -124,6 +109,17 @@ return false; } +static void removeClobberedRegsFromMap(MachineCopyPropagation::Reg2MIMap Map, + const MachineOperand &RegMask) { + for (MachineCopyPropagation::Reg2MIMap::iterator I = Map.begin(), + E = Map.end(), Next; I != E; I = Next) { + Next = std::next(I); + unsigned Reg = I->first; + if (RegMask.clobbersPhysReg(Reg)) + Map.erase(I); + } +} + void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n"); @@ -142,9 +138,7 @@ DenseMap::iterator CI = AvailCopyMap.find(Src); if (CI != AvailCopyMap.end()) { MachineInstr *CopyMI = CI->second; - if (!MRI->isReserved(Def) && - (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) && - isNopCopy(CopyMI, Def, Src, TRI)) { + if (!MRI->isReserved(Def) && isNopCopy(CopyMI, Def, Src, TRI)) { // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. // %ECX = COPY %EAX @@ -260,9 +254,8 @@ } // The instruction has a register mask operand which means that it clobbers - // a large set of registers. It is possible to use the register mask to - // prune the available copies, but treat it like a basic block boundary for - // now. + // a large set of registers. Treat clobbered registers the same way as + // defined registers. if (RegMask) { // Erase any MaybeDeadCopies whose destination register is clobbered. for (MachineInstr *MaybeDead : MaybeDeadCopies) { @@ -276,13 +269,23 @@ Changed = true; ++NumDeletes; } - - // Clear all data structures as if we were beginning a new basic block. MaybeDeadCopies.clear(); - AvailCopyMap.clear(); - CopyMap.clear(); - SrcMap.clear(); - continue; + + removeClobberedRegsFromMap(AvailCopyMap, *RegMask); + removeClobberedRegsFromMap(CopyMap, *RegMask); + for (SourceMap::iterator I = SrcMap.begin(), E = SrcMap.end(), Next; + I != E; I = Next) { + Next = std::next(I); + unsigned Reg = I->first; + if (RegMask->clobbersPhysReg(Reg)) { + const DestList &Defs = I->second; + for (unsigned MappedDef : Defs) { + // Source of copy is no longer available for propagation. + for (MCSubRegIterator SR(MappedDef, TRI, true); SR.isValid(); ++SR) + AvailCopyMap.erase(*SR); + } + } + } } for (unsigned Reg : Defs) { Index: test/CodeGen/X86/machine-copy-prop.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/machine-copy-prop.mir @@ -0,0 +1,21 @@ +# RUN: llc -march=x86 -run-pass machine-cp -o /dev/null %s 2>&1 | FileCheck %s + +--- | + declare preserve_mostcc void @foo() + define void @copyprop1() { ret void } +... +--- +# CHECK-LABEL: name: copyprop1 +# CHECK: bb.0: +# CHECK-NEXT: %rax = COPY %rdi +# CHECK-NEXT: CALL64pcrel32 @foo, csr_64_rt_mostregs, implicit %rsp, implicit-def %rsp +# CHECK-NOT: %rdi = COPY %rax +# CHECK-NEXT: NOOP implicit %rax, implicit %rdi +name: copyprop1 +body: | + bb.0: + %rax = COPY %rdi + CALL64pcrel32 @foo, csr_64_rt_mostregs, implicit %rsp, implicit-def %rsp + %rdi = COPY %rax + NOOP implicit %rax, implicit %rdi +...