Skip to content

Commit c8f78f8

Browse files
committedApr 5, 2019
[AMDGPU] Add MachineDCE pass after RenameIndependentSubregs
Detect dead lanes can create some dead defs. Then RenameIndependentSubregs will break a REG_SEQUENCE which may use these dead defs. At this point a dead instruction can be removed but we do not run a DCE anymore. MachineDCE was only running before live variable analysis. The patch adds a mean to preserve LiveIntervals and SlotIndexes in case it works past this. Differential Revision: https://reviews.llvm.org/D59626 llvm-svn: 357805
1 parent d2225d0 commit c8f78f8

8 files changed

+63
-4
lines changed
 

‎llvm/lib/CodeGen/DeadMachineInstructionElim.cpp

+28-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
#include "llvm/ADT/DenseSet.h"
1314
#include "llvm/ADT/Statistic.h"
15+
#include "llvm/CodeGen/LiveIntervals.h"
1416
#include "llvm/CodeGen/MachineFunctionPass.h"
1517
#include "llvm/CodeGen/MachineRegisterInfo.h"
1618
#include "llvm/CodeGen/Passes.h"
@@ -32,6 +34,7 @@ namespace {
3234
const TargetRegisterInfo *TRI;
3335
const MachineRegisterInfo *MRI;
3436
const TargetInstrInfo *TII;
37+
LiveIntervals *LIS;
3538
BitVector LivePhysRegs;
3639

3740
public:
@@ -41,7 +44,7 @@ namespace {
4144
}
4245

4346
void getAnalysisUsage(AnalysisUsage &AU) const override {
44-
AU.setPreservesCFG();
47+
AU.setPreservesAll();
4548
MachineFunctionPass::getAnalysisUsage(AU);
4649
}
4750

@@ -78,9 +81,15 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
7881
unsigned Reg = MO.getReg();
7982
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
8083
// Don't delete live physreg defs, or any reserved register defs.
81-
if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
84+
// Do not remove physreg defs if we have LIS as we may be unable
85+
// to accurately recompute its liveness.
86+
if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg) || LIS)
8287
return false;
8388
} else {
89+
// An instruction can also use its def in case if it is a tied operand.
90+
// TODO: Technically we can also remove it if def dominates the use.
91+
// This can happen when two instructions define different subregs
92+
// of the same register.
8493
for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
8594
if (&Use != MI)
8695
// This def has a non-debug use. Don't delete the instruction!
@@ -102,6 +111,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
102111
MRI = &MF.getRegInfo();
103112
TRI = MF.getSubtarget().getRegisterInfo();
104113
TII = MF.getSubtarget().getInstrInfo();
114+
LIS = getAnalysisIfAvailable<LiveIntervals>();
115+
DenseSet<unsigned> RecalcRegs;
105116

106117
// Loop over all instructions in all blocks, from bottom to top, so that it's
107118
// more likely that chains of dependent but ultimately dead instructions will
@@ -127,6 +138,14 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
127138
// If the instruction is dead, delete it!
128139
if (isDead(MI)) {
129140
LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
141+
if (LIS) {
142+
for (const MachineOperand &MO : MI->operands()) {
143+
if (MO.isReg() && TRI->isVirtualRegister(MO.getReg()))
144+
RecalcRegs.insert(MO.getReg());
145+
}
146+
LIS->RemoveMachineInstrFromMaps(*MI);
147+
}
148+
130149
// It is possible that some DBG_VALUE instructions refer to this
131150
// instruction. They get marked as undef and will be deleted
132151
// in the live debug variable analysis.
@@ -170,5 +189,12 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
170189
}
171190

172191
LivePhysRegs.clear();
192+
193+
for (auto Reg : RecalcRegs) {
194+
LIS->removeInterval(Reg);
195+
if (!MRI->reg_empty(Reg))
196+
LIS->createAndComputeVirtRegInterval(Reg);
197+
}
198+
173199
return AnyChanges;
174200
}

‎llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,12 @@ static cl::opt<bool> EnableSIModeRegisterPass(
163163
cl::init(true),
164164
cl::Hidden);
165165

166+
// Option is used in lit tests to prevent deadcoding of patterns inspected.
167+
static cl::opt<bool>
168+
EnableDCEInRA("amdgpu-dce-in-ra",
169+
cl::init(true), cl::Hidden,
170+
cl::desc("Enable machine DCE inside regalloc"));
171+
166172
extern "C" void LLVMInitializeAMDGPUTarget() {
167173
// Register the target
168174
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -901,6 +907,9 @@ void GCNPassConfig::addOptimizedRegAlloc() {
901907
// This must be run just after RegisterCoalescing.
902908
insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
903909

910+
if (EnableDCEInRA)
911+
insertPass(&RenameIndependentSubregsID, &DeadMachineInstructionElimID);
912+
904913
TargetPassConfig::addOptimizedRegAlloc();
905914
}
906915

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# RUN: llc -march=amdgcn -mcpu=tonga %s -start-before detect-dead-lanes -stop-before machine-scheduler -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s
2+
3+
# GCN-LABEL: name: dead_lane
4+
# GCN: bb.0:
5+
# GCN-NEXT: undef %3.sub0:vreg_64 = V_MAC_F32_e32 undef %1:vgpr_32, undef %1:vgpr_32, undef %3.sub0, implicit $exec
6+
# GCN-NEXT: FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0,
7+
---
8+
name: dead_lane
9+
tracksRegLiveness: true
10+
body: |
11+
bb.0:
12+
%1:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec
13+
%2:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec
14+
%3:vreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, %2:vgpr_32, %subreg.sub1
15+
FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, 0, 0, 0, implicit $exec, implicit $flat_scr
16+
S_ENDPGM 0
17+
18+
...

‎llvm/test/CodeGen/AMDGPU/salu-to-valu.ll

+1
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,7 @@ bb2:
475475

476476
bb4:
477477
%tmp5 = phi i32 [ %tmp3, %bb2 ], [ %tmp, %bb1 ]
478+
store volatile i32 %tmp5, i32 addrspace(1)* undef
478479
br label %bb1
479480
}
480481

‎llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll

+1
Original file line numberDiff line numberDiff line change
@@ -523,5 +523,6 @@ bb2: ; preds = %bb1
523523

524524
bb11: ; preds = %bb10, %bb2
525525
%tmp12 = phi <2 x i32> [ %tmp6, %bb2 ], [ %tmp, %bb1 ]
526+
store volatile <2 x i32> %tmp12, <2 x i32> addrspace(1)* undef
526527
br label %bb1
527528
}

‎llvm/test/CodeGen/AMDGPU/shrink-carry.mir

+4
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ body: |
2121
%2 = IMPLICIT_DEF
2222
%3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
2323
%4, %5 = V_SUBBREV_U32_e64 0, %0, %3, 0, implicit $exec
24+
GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
2425
2526
...
2627

@@ -45,6 +46,7 @@ body: |
4546
%2 = IMPLICIT_DEF
4647
%3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
4748
%4, %5 = V_SUBB_U32_e64 %0, 0, %3, 0, implicit $exec
49+
GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
4850
4951
...
5052

@@ -69,6 +71,7 @@ body: |
6971
%2 = IMPLICIT_DEF
7072
%3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
7173
%4, %5 = V_ADDC_U32_e64 0, %0, %3, 0, implicit $exec
74+
GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
7275
7376
...
7477

@@ -93,5 +96,6 @@ body: |
9396
%2 = IMPLICIT_DEF
9497
%3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
9598
%4, %5 = V_ADDC_U32_e64 %0, 0, %3, 0, implicit $exec
99+
GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
96100
97101
...

‎llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -stress-regalloc=1 -start-before=simple-register-coalescing -stop-after=greedy -o - %s | FileCheck %s
1+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-dce-in-ra=0 -verify-machineinstrs -stress-regalloc=1 -start-before=simple-register-coalescing -stop-after=greedy -o - %s | FileCheck %s
22
# https://bugs.llvm.org/show_bug.cgi?id=33620
33

44
---

‎llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llc -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s
1+
; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-dce-in-ra=0 -o - %s | FileCheck %s
22
; Don't crash when the use of an undefined value is only detected by the
33
; register coalescer because it is hidden with subregister insert/extract.
44
target triple="amdgcn--"

0 commit comments

Comments
 (0)
Please sign in to comment.