Index: llvm/trunk/lib/CodeGen/MachineCopyPropagation.cpp =================================================================== --- llvm/trunk/lib/CodeGen/MachineCopyPropagation.cpp +++ llvm/trunk/lib/CodeGen/MachineCopyPropagation.cpp @@ -52,6 +52,7 @@ private: void ClobberRegister(unsigned Reg); void CopyPropagateBlock(MachineBasicBlock &MBB); + bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); /// Candidates for deletion. SmallSetVector MaybeDeadCopies; @@ -109,29 +110,61 @@ } } -/// isNopCopy - Return true if the specified copy is really a nop. That is -/// if the source of the copy is the same of the definition of the copy that -/// supplied the source. If the source of the copy is a sub-register than it -/// must check the sub-indices match. e.g. -/// ecx = mov eax -/// al = mov cl -/// But not -/// ecx = mov eax -/// al = mov ch -static bool isNopCopy(const MachineInstr *CopyMI, unsigned Def, unsigned Src, - const TargetRegisterInfo *TRI) { - unsigned SrcSrc = CopyMI->getOperand(1).getReg(); - if (Def == SrcSrc) +/// Return true if \p PreviousCopy did copy register \p Src to register \p Def. +/// This fact may have been obscured by sub register usage or may not be true at +/// all even though Src and Def are subregisters of the registers used in +/// PreviousCopy. e.g. +/// isNopCopy("ecx = COPY eax", AX, CX) == true +/// isNopCopy("ecx = COPY eax", AH, CL) == false +static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src, + unsigned Def, const TargetRegisterInfo *TRI) { + unsigned PreviousSrc = PreviousCopy.getOperand(1).getReg(); + unsigned PreviousDef = PreviousCopy.getOperand(0).getReg(); + if (Src == PreviousSrc) { + assert(Def == PreviousDef); return true; - if (TRI->isSubRegister(SrcSrc, Def)) { - unsigned SrcDef = CopyMI->getOperand(0).getReg(); - unsigned SubIdx = TRI->getSubRegIndex(SrcSrc, Def); - if (!SubIdx) - return false; - return SubIdx == TRI->getSubRegIndex(SrcDef, Src); } + if (!TRI->isSubRegister(PreviousSrc, Src)) + return false; + unsigned SubIdx = TRI->getSubRegIndex(PreviousSrc, Src); + return SubIdx == TRI->getSubRegIndex(PreviousDef, Def); +} + +/// Remove instruction \p Copy if there exists a previous copy that copies the +/// register \p Src to the register \p Def; This may happen indirectly by +/// copying the super registers. +bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src, + unsigned Def) { + // Avoid eliminating a copy from/to a reserved registers as we cannot predict + // the value (Example: The sparc zero register is writable but stays zero). + if (MRI->isReserved(Src) || MRI->isReserved(Def)) + return false; + + // Search for an existing copy. + Reg2MIMap::iterator CI = AvailCopyMap.find(Def); + if (CI == AvailCopyMap.end()) + return false; + + // Check that the existing copy uses the correct sub registers. + MachineInstr &PrevCopy = *CI->second; + if (!isNopCopy(PrevCopy, Src, Def, TRI)) + return false; + + DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump()); - return false; + // Copy was redundantly redefining either Src or Def. Remove earlier kill + // flags between Copy and PrevCopy because the value will be reused now. + assert(Copy.isCopy()); + unsigned CopyDef = Copy.getOperand(0).getReg(); + assert(CopyDef == Src || CopyDef == Def); + for (MachineInstr &MI : + make_range(PrevCopy.getIterator(), Copy.getIterator())) + MI.clearRegisterKills(CopyDef, TRI); + + Copy.eraseFromParent(); + Changed = true; + ++NumDeletes; + return true; } void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { @@ -149,38 +182,23 @@ !TargetRegisterInfo::isVirtualRegister(Src) && "MachineCopyPropagation should be run after register allocation!"); - DenseMap::iterator CI = AvailCopyMap.find(Src); - if (CI != AvailCopyMap.end()) { - MachineInstr *CopyMI = CI->second; - if (!MRI->isReserved(Def) && isNopCopy(CopyMI, Def, Src, TRI)) { - // The two copies cancel out and the source of the first copy - // hasn't been overridden, eliminate the second one. e.g. - // %ECX = COPY %EAX - // ... nothing clobbered EAX. - // %EAX = COPY %ECX - // => - // %ECX = COPY %EAX - // - // Also avoid eliminating a copy from reserved registers unless the - // definition is proven not clobbered. e.g. - // %RSP = COPY %RAX - // CALL - // %RAX = COPY %RSP - - DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; MI->dump()); - - // Clear any kills of Def between CopyMI and MI. This extends the - // live range. - for (MachineInstr &MMI - : make_range(CopyMI->getIterator(), MI->getIterator())) - MMI.clearRegisterKills(Def, TRI); - - MI->eraseFromParent(); - Changed = true; - ++NumDeletes; - continue; - } - } + // The two copies cancel out and the source of the first copy + // hasn't been overridden, eliminate the second one. e.g. + // %ECX = COPY %EAX + // ... nothing clobbered EAX. + // %EAX = COPY %ECX + // => + // %ECX = COPY %EAX + // + // or + // + // %ECX = COPY %EAX + // ... nothing clobbered EAX. + // %ECX = COPY %EAX + // => + // %ECX = COPY %EAX + if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def)) + continue; // If Src is defined by a previous copy, the previous copy cannot be // eliminated. @@ -293,9 +311,8 @@ } // Any previous copy definition or reading the Defs is no longer available. - for (unsigned Reg : Defs) { + for (unsigned Reg : Defs) ClobberRegister(Reg); - } } // If MBB doesn't have successors, delete the copies whose defs are not used. Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll @@ -10,11 +10,10 @@ ; TODO: this constant should be folded: ; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1 ; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff -; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]] -; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]] +; VI: s_mov_b32 s[[LOW:[0-9+]]], s[[ALLBITS]] +; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW]]:[[HIGH1]]] ; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff -; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]] -; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]] +; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW]]:[[HIGH2]]] define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind { %rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll @@ -28,11 +28,10 @@ ; TODO: this constant should be folded: ; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1 ; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff -; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]] -; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]] +; VI: s_mov_b32 s[[LOW:[0-9+]]], s[[ALLBITS]] +; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW]]:[[HIGH1]]] ; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff -; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]] -; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]] +; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW]]:[[HIGH2]]] define void @rsq_clamp_f64(double addrspace(1)* %out, double %src) #0 { %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src) store double %rsq_clamp, double addrspace(1)* %out Index: llvm/trunk/test/CodeGen/X86/machine-copy-prop.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/machine-copy-prop.mir +++ llvm/trunk/test/CodeGen/X86/machine-copy-prop.mir @@ -1,4 +1,4 @@ -# RUN: llc -march=x86 -run-pass machine-cp -o /dev/null %s 2>&1 | FileCheck %s +# RUN: llc -march=x86 -run-pass machine-cp -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s --- | declare void @foo() @@ -6,8 +6,14 @@ define void @copyprop_remove_kill1() { ret void } define void @copyprop_remove_kill2() { ret void } define void @copyprop0() { ret void } + define void @copyprop1() { ret void } + define void @copyprop2() { ret void } define void @nocopyprop0() { ret void } define void @nocopyprop1() { ret void } + define void @nocopyprop2() { ret void } + define void @nocopyprop3() { ret void } + define void @nocopyprop4() { ret void } + define void @nocopyprop5() { ret void } ... --- # The second copy is redundant and will be removed, check that we also remove @@ -79,6 +85,38 @@ NOOP implicit %rax, implicit %rdi ... --- +# The 2nd copy is redundant; The call preserves the source and dest register. +# CHECK-LABEL: name: copyprop1 +# CHECK: bb.0: +# CHECK-NEXT: %rax = COPY %rdi +# CHECK-NEXT: NOOP implicit %rax +# CHECK-NEXT: NOOP implicit %rax, implicit %rdi +name: copyprop1 +body: | + bb.0: + %rax = COPY %rdi + NOOP implicit killed %rax + %rax = COPY %rdi + NOOP implicit %rax, implicit %rdi +... +--- +# CHECK-LABEL: name: copyprop2 +# CHECK: bb.0: +# CHECK-NEXT: %rax = COPY %rdi +# CHECK-NEXT: NOOP implicit %ax +# CHECK-NEXT: CALL64pcrel32 @foo, csr_64_rt_mostregs +# CHECK-NOT: %rax = COPY %rdi +# CHECK-NEXT: NOOP implicit %rax, implicit %rdi +name: copyprop2 +body: | + bb.0: + %rax = COPY %rdi + NOOP implicit killed %ax + CALL64pcrel32 @foo, csr_64_rt_mostregs + %rax = COPY %rdi + NOOP implicit %rax, implicit %rdi +... +--- # The second copy is not redundant if the source register (%rax) is clobbered # even if the dest (%rbp) is not. # CHECK-LABEL: name: nocopyprop0 @@ -112,3 +150,66 @@ %rax = COPY %rbp NOOP implicit %rax, implicit %rbp ... +--- +# The second copy is not redundant if the source register (%rax) is clobbered +# even if the dest (%rbp) is not. +# CHECK-LABEL: name: nocopyprop2 +# CHECK: bb.0: +# CHECK-NEXT: %rax = COPY %rbp +# CHECK-NEXT: CALL64pcrel32 @foo, csr_64, implicit %rax, implicit %rbp +# CHECK-NEXT: %rax = COPY %rbp +# CHECK-NEXT: NOOP implicit %rax, implicit %rbp +name: nocopyprop2 +body: | + bb.0: + %rax = COPY %rbp + CALL64pcrel32 @foo, csr_64, implicit %rax, implicit %rbp + %rax = COPY %rbp + NOOP implicit %rax, implicit %rbp +... +--- +# The second copy is not redundant if the dest register (%rax) is clobbered +# even if the source (%rbp) is not. +# CHECK-LABEL: name: nocopyprop3 +# CHECK: bb.0: +# CHECK-NEXT: %rbp = COPY %rax +# CHECK-NEXT: CALL64pcrel32 @foo, csr_64, implicit %rax, implicit %rbp +# CHECK-NEXT: %rbp = COPY %rax +# CHECK-NEXT: NOOP implicit %rax, implicit %rbp +name: nocopyprop3 +body: | + bb.0: + %rbp = COPY %rax + CALL64pcrel32 @foo, csr_64, implicit %rax, implicit %rbp + %rbp = COPY %rax + NOOP implicit %rax, implicit %rbp +... +--- +# A reserved register may change its value so the 2nd copy is not redundant. +# CHECK-LABEL: name: nocopyprop4 +# CHECK: bb.0: +# CHECK-NEXT: %rax = COPY %rip +# CHECK-NEXT: NOOP implicit %rax +# CHECK-NEXT: %rax = COPY %rip +# CHECK-NEXT: NOOP implicit %rax +name: nocopyprop4 +body: | + bb.0: + %rax = COPY %rip + NOOP implicit %rax + %rax = COPY %rip + NOOP implicit %rax +... +--- +# Writing to a reserved register may have additional effects (slightly illegal +# testcase because writing to %rip like this should make the instruction a jump) +# CHECK-LABEL: name: nocopyprop5 +# CHECK: bb.0: +# CHECK-NEXT: %rip = COPY %rax +# CHECK-NEXT: %rip = COPY %rax +name: nocopyprop5 +body: | + bb.0: + %rip = COPY %rax + %rip = COPY %rax +...