Skip to content

Commit

Permalink
Revert "[MachineCopyPropagation] Extend pass to do COPY source forwar…
Browse files Browse the repository at this point in the history
…ding"

This reverts commit r323991.

This commit breaks target that don't model all the register constraints
in TableGen. So far the workaround was to set the
hasExtraXXXRegAllocReq, but it proves that it doesn't cover all the
cases.
For instance, when mutating an instruction (like in the lowering of
COPYs) the isRenamable flag is not properly updated. The same problem
will happen when attaching machine operand from one instruction to
another.

Geoff Berry is working on a fix in https://reviews.llvm.org/D43042.

llvm-svn: 325421
Quentin Colombet committed Feb 17, 2018
1 parent a1d6107 commit 48abac8
Showing 118 changed files with 456 additions and 1,021 deletions.
207 changes: 1 addition & 206 deletions llvm/lib/CodeGen/MachineCopyPropagation.cpp
Original file line number Diff line number Diff line change
@@ -9,35 +9,6 @@
//
// This is an extremely simple MachineInstr-level copy propagation pass.
//
// This pass forwards the source of COPYs to the users of their destinations
// when doing so is legal. For example:
//
// %reg1 = COPY %reg0
// ...
// ... = OP %reg1
//
// If
// - %reg0 has not been clobbered by the time of the use of %reg1
// - the register class constraints are satisfied
// - the COPY def is the only value that reaches OP
// then this pass replaces the above with:
//
// %reg1 = COPY %reg0
// ...
// ... = OP %reg0
//
// This pass also removes some redundant COPYs. For example:
//
// %R1 = COPY %R0
// ... // No clobber of %R1
// %R0 = COPY %R1 <<< Removed
//
// or
//
// %R1 = COPY %R0
// ... // No clobber of %R0
// %R1 = COPY %R0 <<< Removed
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/DenseMap.h"
@@ -52,13 +23,11 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <iterator>
@@ -68,9 +37,6 @@ using namespace llvm;
#define DEBUG_TYPE "machine-cp"

STATISTIC(NumDeletes, "Number of dead copies deleted");
STATISTIC(NumCopyForwards, "Number of copy uses forwarded");
DEBUG_COUNTER(FwdCounter, "machine-cp-fwd",
"Controls which register COPYs are forwarded");

namespace {

@@ -107,10 +73,6 @@ using Reg2MIMap = DenseMap<unsigned, MachineInstr *>;
void ReadRegister(unsigned Reg);
void CopyPropagateBlock(MachineBasicBlock &MBB);
bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
void forwardUses(MachineInstr &MI);
bool isForwardableRegClassCopy(const MachineInstr &Copy,
const MachineInstr &UseI, unsigned UseIdx);
bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);

/// Candidates for deletion.
SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;
@@ -246,152 +208,6 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
return true;
}

/// Decide whether we should forward the source of \param Copy to its use in
/// \param UseI based on the physical register class constraints of the opcode
/// and avoiding introducing more cross-class COPYs.
bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
const MachineInstr &UseI,
unsigned UseIdx) {

unsigned CopySrcReg = Copy.getOperand(1).getReg();

// If the new register meets the opcode register constraints, then allow
// forwarding.
if (const TargetRegisterClass *URC =
UseI.getRegClassConstraint(UseIdx, TII, TRI))
return URC->contains(CopySrcReg);

if (!UseI.isCopy())
return false;

/// COPYs don't have register class constraints, so if the user instruction
/// is a COPY, we just try to avoid introducing additional cross-class
/// COPYs. For example:
///
/// RegClassA = COPY RegClassB // Copy parameter
/// ...
/// RegClassB = COPY RegClassA // UseI parameter
///
/// which after forwarding becomes
///
/// RegClassA = COPY RegClassB
/// ...
/// RegClassB = COPY RegClassB
///
/// so we have reduced the number of cross-class COPYs and potentially
/// introduced a nop COPY that can be removed.
const TargetRegisterClass *UseDstRC =
TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg());

const TargetRegisterClass *SuperRC = UseDstRC;
for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses();
SuperRC; SuperRC = *SuperRCI++)
if (SuperRC->contains(CopySrcReg))
return true;

return false;
}

/// Check that \p MI does not have implicit uses that overlap with it's \p Use
/// operand (the register being replaced), since these can sometimes be
/// implicitly tied to other operands. For example, on AMDGPU:
///
/// V_MOVRELS_B32_e32 %VGPR2, %M0<imp-use>, %EXEC<imp-use>, %VGPR2_VGPR3_VGPR4_VGPR5<imp-use>
///
/// the %VGPR2 is implicitly tied to the larger reg operand, but we have no
/// way of knowing we need to update the latter when updating the former.
bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI,
const MachineOperand &Use) {
for (const MachineOperand &MIUse : MI.uses())
if (&MIUse != &Use && MIUse.isReg() && MIUse.isImplicit() &&
MIUse.isUse() && TRI->regsOverlap(Use.getReg(), MIUse.getReg()))
return true;

return false;
}

/// Look for available copies whose destination register is used by \p MI and
/// replace the use in \p MI with the copy's source register.
void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
if (AvailCopyMap.empty())
return;

// Look for non-tied explicit vreg uses that have an active COPY
// instruction that defines the physical register allocated to them.
// Replace the vreg with the source of the active COPY.
for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx < OpEnd;
++OpIdx) {
MachineOperand &MOUse = MI.getOperand(OpIdx);
// Don't forward into undef use operands since doing so can cause problems
// with the machine verifier, since it doesn't treat undef reads as reads,
// so we can end up with a live range that ends on an undef read, leading to
// an error that the live range doesn't end on a read of the live range
// register.
if (!MOUse.isReg() || MOUse.isTied() || MOUse.isUndef() || MOUse.isDef() ||
MOUse.isImplicit())
continue;

if (!MOUse.getReg())
continue;

// Check that the register is marked 'renamable' so we know it is safe to
// rename it without violating any constraints that aren't expressed in the
// IR (e.g. ABI or opcode requirements).
if (!MOUse.isRenamable())
continue;

auto CI = AvailCopyMap.find(MOUse.getReg());
if (CI == AvailCopyMap.end())
continue;

MachineInstr &Copy = *CI->second;
unsigned CopyDstReg = Copy.getOperand(0).getReg();
const MachineOperand &CopySrc = Copy.getOperand(1);
unsigned CopySrcReg = CopySrc.getReg();

// FIXME: Don't handle partial uses of wider COPYs yet.
if (MOUse.getReg() != CopyDstReg) {
DEBUG(dbgs() << "MCP: FIXME! Not forwarding COPY to sub-register use:\n "
<< MI);
continue;
}

// Don't forward COPYs of reserved regs unless they are constant.
if (MRI->isReserved(CopySrcReg) && !MRI->isConstantPhysReg(CopySrcReg))
continue;

if (!isForwardableRegClassCopy(Copy, MI, OpIdx))
continue;

if (hasImplicitOverlap(MI, MOUse))
continue;

if (!DebugCounter::shouldExecute(FwdCounter)) {
DEBUG(dbgs() << "MCP: Skipping forwarding due to debug counter:\n "
<< MI);
continue;
}

DEBUG(dbgs() << "MCP: Replacing " << printReg(MOUse.getReg(), TRI)
<< "\n with " << printReg(CopySrcReg, TRI) << "\n in "
<< MI << " from " << Copy);

MOUse.setReg(CopySrcReg);
if (!CopySrc.isRenamable())
MOUse.setIsRenamable(false);

DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");

// Clear kill markers that may have been invalidated.
for (MachineInstr &KMI :
make_range(Copy.getIterator(), std::next(MI.getIterator())))
KMI.clearRegisterKills(CopySrcReg, TRI);

++NumCopyForwards;
Changed = true;
}
}

void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n");

@@ -425,11 +241,6 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def))
continue;

forwardUses(*MI);

// Src may have been changed by forwardUses()
Src = MI->getOperand(1).getReg();

// If Src is defined by a previous copy, the previous copy cannot be
// eliminated.
ReadRegister(Src);
@@ -481,20 +292,6 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
continue;
}

// Clobber any earlyclobber regs first.
for (const MachineOperand &MO : MI->operands())
if (MO.isReg() && MO.isEarlyClobber()) {
unsigned Reg = MO.getReg();
// If we have a tied earlyclobber, that means it is also read by this
// instruction, so we need to make sure we don't remove it as dead
// later.
if (MO.isTied())
ReadRegister(Reg);
ClobberRegister(Reg);
}

forwardUses(*MI);

// Not a copy.
SmallVector<unsigned, 2> Defs;
const MachineOperand *RegMask = nullptr;
@@ -510,7 +307,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
assert(!TargetRegisterInfo::isVirtualRegister(Reg) &&
"MachineCopyPropagation should be run after register allocation!");

if (MO.isDef() && !MO.isEarlyClobber()) {
if (MO.isDef()) {
Defs.push_back(Reg);
continue;
} else if (MO.readsReg())
@@ -567,8 +364,6 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// since we don't want to trust live-in lists.
if (MBB.succ_empty()) {
for (MachineInstr *MaybeDead : MaybeDeadCopies) {
DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: ";
MaybeDead->dump());
assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg()));
MaybeDead->eraseFromParent();
Changed = true;
4 changes: 0 additions & 4 deletions llvm/lib/CodeGen/TargetPassConfig.cpp
Original file line number Diff line number Diff line change
@@ -1080,10 +1080,6 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
// kill markers.
addPass(&StackSlotColoringID);

// Copy propagate to forward register uses and try to eliminate COPYs that
// were not coalesced.
addPass(&MachineCopyPropagationID);

// Run post-ra machine LICM to hoist reloads / remats.
//
// FIXME: can this move into MachineLateOptimization?
9 changes: 3 additions & 6 deletions llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
Original file line number Diff line number Diff line change
@@ -9,8 +9,7 @@ define i16 @halfword(%struct.a* %ctx, i32 %xor72) nounwind {
; CHECK-LABEL: halfword:
; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
; CHECK: ldrh [[REG1:w[0-9]+]], [{{.*}}[[REG2:x[0-9]+]], [[REG]], lsl #1]
; CHECK: mov [[REG3:x[0-9]+]], [[REG2]]
; CHECK: strh [[REG1]], [{{.*}}[[REG3]], [[REG]], lsl #1]
; CHECK: strh [[REG1]], [{{.*}}[[REG2]], [[REG]], lsl #1]
%shr81 = lshr i32 %xor72, 9
%conv82 = zext i32 %shr81 to i64
%idxprom83 = and i64 %conv82, 255
@@ -25,8 +24,7 @@ define i32 @word(%struct.b* %ctx, i32 %xor72) nounwind {
; CHECK-LABEL: word:
; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
; CHECK: ldr [[REG1:w[0-9]+]], [{{.*}}[[REG2:x[0-9]+]], [[REG]], lsl #2]
; CHECK: mov [[REG3:x[0-9]+]], [[REG2]]
; CHECK: str [[REG1]], [{{.*}}[[REG3]], [[REG]], lsl #2]
; CHECK: str [[REG1]], [{{.*}}[[REG2]], [[REG]], lsl #2]
%shr81 = lshr i32 %xor72, 9
%conv82 = zext i32 %shr81 to i64
%idxprom83 = and i64 %conv82, 255
@@ -41,8 +39,7 @@ define i64 @doubleword(%struct.c* %ctx, i32 %xor72) nounwind {
; CHECK-LABEL: doubleword:
; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
; CHECK: ldr [[REG1:x[0-9]+]], [{{.*}}[[REG2:x[0-9]+]], [[REG]], lsl #3]
; CHECK: mov [[REG3:x[0-9]+]], [[REG2]]
; CHECK: str [[REG1]], [{{.*}}[[REG3]], [[REG]], lsl #3]
; CHECK: str [[REG1]], [{{.*}}[[REG2]], [[REG]], lsl #3]
%shr81 = lshr i32 %xor72, 9
%conv82 = zext i32 %shr81 to i64
%idxprom83 = and i64 %conv82, 255
16 changes: 12 additions & 4 deletions llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
Original file line number Diff line number Diff line change
@@ -8,19 +8,27 @@ define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
; CHECK: add.2d v[[REG:[0-9]+]], v0, v1
; CHECK: add d[[REG3:[0-9]+]], d[[REG]], d1
; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1
; CHECK-NOT: fmov
; Without advanced copy optimization, we end up with cross register
; banks copies that cannot be coalesced.
; CHECK-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
; With advanced copy optimization, we end up with just one copy
; to insert the computed high part into the V register.
; CHECK-OPT-NOT: fmov
; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
; CHECK-NOT: fmov
; CHECK-NOOPT: fmov d0, [[COPY_REG3]]
; CHECK-OPT-NOT: fmov
; CHECK: mov.d v0[1], [[COPY_REG2]]
; CHECK-NEXT: ret
;
; GENERIC-LABEL: bar:
; GENERIC: add v[[REG:[0-9]+]].2d, v0.2d, v1.2d
; GENERIC: add d[[REG3:[0-9]+]], d[[REG]], d1
; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1
; GENERIC-NOT: fmov
; GENERIC-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
; GENERIC-OPT-NOT: fmov
; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
; GENERIC-NOT: fmov
; GENERIC-NOOPT: fmov d0, [[COPY_REG3]]
; GENERIC-OPT-NOT: fmov
; GENERIC: mov v0.d[1], [[COPY_REG2]]
; GENERIC-NEXT: ret
%add = add <2 x i64> %a, %b
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll
Original file line number Diff line number Diff line change
@@ -4,10 +4,8 @@
define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind ssp {
entry:
; CHECK-LABEL: t:
; CHECK: mov [[REG2:x[0-9]+]], x3
; CHECK: mov [[REG1:x[0-9]+]], x2
; CHECK: mov x0, x2
; CHECK: mov x1, x3
; CHECK: mov x0, [[REG1:x[0-9]+]]
; CHECK: mov x1, [[REG2:x[0-9]+]]
; CHECK: bl _foo
; CHECK: mov x0, [[REG1]]
; CHECK: mov x1, [[REG2]]
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
Original file line number Diff line number Diff line change
@@ -45,7 +45,8 @@ define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) {

; CHECK: [[FAILED]]:
; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
; CHECK: eor w0, wzr, #0x1
; CHECK: mov [[TMP:w[0-9]+]], wzr
; CHECK: eor w0, [[TMP]], #0x1
; CHECK: ret

%pair = cmpxchg i8* %value, i8 %oldValue, i8 %newValue acq_rel monotonic
Loading

0 comments on commit 48abac8

Please sign in to comment.