Index: lib/CodeGen/DeadMachineInstructionElim.cpp =================================================================== --- lib/CodeGen/DeadMachineInstructionElim.cpp +++ lib/CodeGen/DeadMachineInstructionElim.cpp @@ -10,7 +10,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -32,6 +34,7 @@ const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; const TargetInstrInfo *TII; + LiveIntervals *LIS; BitVector LivePhysRegs; public: @@ -41,7 +44,7 @@ } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); + AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -78,9 +81,15 @@ unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { // Don't delete live physreg defs, or any reserved register defs. - if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg)) + // Do not remove physreg defs if we have LIS as we may be unable + // to accurately recompute its liveness. + if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg) || LIS) return false; } else { + // An instruction can also use its def in case if it is a tied operand. + // TODO: Technically we can also remove it if def dominates the use. + // This can happen when two instructions define different subregs + // of the same register. for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) { if (&Use != MI) // This def has a non-debug use. Don't delete the instruction! @@ -102,6 +111,8 @@ MRI = &MF.getRegInfo(); TRI = MF.getSubtarget().getRegisterInfo(); TII = MF.getSubtarget().getInstrInfo(); + LIS = getAnalysisIfAvailable(); + DenseSet RecalcRegs; // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will @@ -127,6 +138,14 @@ // If the instruction is dead, delete it! if (isDead(MI)) { LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); + if (LIS) { + for (const MachineOperand &MO : MI->operands()) { + if (MO.isReg() && TRI->isVirtualRegister(MO.getReg())) + RecalcRegs.insert(MO.getReg()); + } + LIS->RemoveMachineInstrFromMaps(*MI); + } + // It is possible that some DBG_VALUE instructions refer to this // instruction. They get marked as undef and will be deleted // in the live debug variable analysis. @@ -170,5 +189,12 @@ } LivePhysRegs.clear(); + + for (auto Reg : RecalcRegs) { + LIS->removeInterval(Reg); + if (!MRI->reg_empty(Reg)) + LIS->createAndComputeVirtRegInterval(Reg); + } + return AnyChanges; } Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -163,6 +163,12 @@ cl::init(true), cl::Hidden); +// Option is used in lit tests to prevent deadcoding of patterns inspected. +static cl::opt +EnableDCEInRA("amdgpu-dce-in-ra", + cl::init(true), cl::Hidden, + cl::desc("Disable machine DCE inside regalloc")); + extern "C" void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine X(getTheAMDGPUTarget()); @@ -901,6 +907,9 @@ // This must be run just after RegisterCoalescing. insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false); + if (EnableDCEInRA) + insertPass(&RenameIndependentSubregsID, &DeadMachineInstructionElimID); + TargetPassConfig::addOptimizedRegAlloc(); } Index: test/CodeGen/AMDGPU/dead-lane.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/dead-lane.mir @@ -0,0 +1,18 @@ +# RUN: llc -march=amdgcn -mcpu=tonga %s -start-before detect-dead-lanes -stop-before machine-scheduler -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: dead_lane +# GCN: bb.0: +# GCN-NEXT: undef %3.sub0:vreg_64 = V_MAC_F32_e32 undef %1:vgpr_32, undef %1:vgpr_32, undef %3.sub0, implicit $exec +# GCN-NEXT: FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, +--- +name: dead_lane +tracksRegLiveness: true +body: | + bb.0: + %1:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec + %2:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec + %3:vreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, %2:vgpr_32, %subreg.sub1 + FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, 0, 0, 0, implicit $exec, implicit $flat_scr + S_ENDPGM 0 + +... Index: test/CodeGen/AMDGPU/salu-to-valu.ll =================================================================== --- test/CodeGen/AMDGPU/salu-to-valu.ll +++ test/CodeGen/AMDGPU/salu-to-valu.ll @@ -475,6 +475,7 @@ bb4: %tmp5 = phi i32 [ %tmp3, %bb2 ], [ %tmp, %bb1 ] + store volatile i32 %tmp5, i32 addrspace(1)* undef br label %bb1 } Index: test/CodeGen/AMDGPU/sdwa-peephole.ll =================================================================== --- test/CodeGen/AMDGPU/sdwa-peephole.ll +++ test/CodeGen/AMDGPU/sdwa-peephole.ll @@ -523,5 +523,6 @@ bb11: ; preds = %bb10, %bb2 %tmp12 = phi <2 x i32> [ %tmp6, %bb2 ], [ %tmp, %bb1 ] + store volatile <2 x i32> %tmp12, <2 x i32> addrspace(1)* undef br label %bb1 } Index: test/CodeGen/AMDGPU/shrink-carry.mir =================================================================== --- test/CodeGen/AMDGPU/shrink-carry.mir +++ test/CodeGen/AMDGPU/shrink-carry.mir @@ -21,6 +21,7 @@ %2 = IMPLICIT_DEF %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec %4, %5 = V_SUBBREV_U32_e64 0, %0, %3, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec ... @@ -45,6 +46,7 @@ %2 = IMPLICIT_DEF %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec %4, %5 = V_SUBB_U32_e64 %0, 0, %3, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec ... @@ -69,6 +71,7 @@ %2 = IMPLICIT_DEF %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec %4, %5 = V_ADDC_U32_e64 0, %0, %3, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec ... @@ -93,5 +96,6 @@ %2 = IMPLICIT_DEF %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec %4, %5 = V_ADDC_U32_e64 %0, 0, %3, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec ... Index: test/CodeGen/AMDGPU/spill-empty-live-interval.mir =================================================================== --- test/CodeGen/AMDGPU/spill-empty-live-interval.mir +++ test/CodeGen/AMDGPU/spill-empty-live-interval.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -stress-regalloc=1 -start-before=simple-register-coalescing -stop-after=greedy -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-dce-in-ra=0 -verify-machineinstrs -stress-regalloc=1 -start-before=simple-register-coalescing -stop-after=greedy -o - %s | FileCheck %s # https://bugs.llvm.org/show_bug.cgi?id=33620 --- Index: test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll =================================================================== --- test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll +++ test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-dce-in-ra=0 -o - %s | FileCheck %s ; Don't crash when the use of an undefined value is only detected by the ; register coalescer because it is hidden with subregister insert/extract. target triple="amdgcn--"