Index: lib/CodeGen/DeadMachineInstructionElim.cpp =================================================================== --- lib/CodeGen/DeadMachineInstructionElim.cpp +++ lib/CodeGen/DeadMachineInstructionElim.cpp @@ -10,7 +10,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -32,6 +34,7 @@ const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; const TargetInstrInfo *TII; + LiveIntervals *LIS; BitVector LivePhysRegs; public: @@ -41,7 +44,7 @@ } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); + AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -78,9 +81,15 @@ unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { // Don't delete live physreg defs, or any reserved register defs. - if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg)) + // Do not remove physreg defs if we have LIS as we may be unable + // to accurately recompute its liveness. + if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg) || LIS) return false; } else { + // An instruction can also use its def in case if it is a tied operand. + // TODO: Technically we can also remove it if def dominates the use. + // This can happen when two instructions define different subregs + // of the same register. for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) { if (&Use != MI) // This def has a non-debug use. Don't delete the instruction! @@ -102,6 +111,13 @@ MRI = &MF.getRegInfo(); TRI = MF.getSubtarget().getRegisterInfo(); TII = MF.getSubtarget().getInstrInfo(); + LIS = getAnalysisIfAvailable(); + DenseSet RecalcRegs; + + // The only reason to run MachineDCE after liveness info is ready is to + // remove instructions defining dead subregisters. + if (LIS && !MRI->subRegLivenessEnabled()) + return false; // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will @@ -127,6 +143,14 @@ // If the instruction is dead, delete it! if (isDead(MI)) { LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); + if (LIS) { + for (const MachineOperand &MO : MI->operands()) { + if (MO.isReg() && TRI->isVirtualRegister(MO.getReg())) + RecalcRegs.insert(MO.getReg()); + } + LIS->RemoveMachineInstrFromMaps(*MI); + } + // It is possible that some DBG_VALUE instructions refer to this // instruction. They get marked as undef and will be deleted // in the live debug variable analysis. @@ -170,5 +194,12 @@ } LivePhysRegs.clear(); + + for (auto Reg : RecalcRegs) { + LIS->removeInterval(Reg); + if (!MRI->reg_empty(Reg)) + LIS->createAndComputeVirtRegInterval(Reg); + } + return AnyChanges; } Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -158,6 +158,12 @@ static cl::opt EarlyLiveIntervals("early-live-intervals", cl::Hidden, cl::desc("Run live interval analysis earlier in the pipeline")); +// Option is used in lit tests to prevent deadcoding of patterns inspected. +static cl::opt +DisableDCEInRA("disable-dce-in-ra", + cl::init(false), cl::Hidden, + cl::desc("Disable machine DCE inside regalloc")); + // Experimental option to use CFL-AA in codegen enum class CFLAAType { None, Steensgaard, Andersen, Both }; static cl::opt UseCFLAA( @@ -1163,6 +1169,9 @@ // separate vregs before. Splitting can also improve reg. allocation quality. addPass(&RenameIndependentSubregsID); + if (!DisableDCEInRA) + addPass(&DeadMachineInstructionElimID); + // PreRA instruction scheduling. addPass(&MachineSchedulerID); Index: test/CodeGen/AArch64/O3-pipeline.ll =================================================================== --- test/CodeGen/AArch64/O3-pipeline.ll +++ test/CodeGen/AArch64/O3-pipeline.ll @@ -114,6 +114,7 @@ ; CHECK-NEXT: Live Interval Analysis ; CHECK-NEXT: Simple Register Coalescing ; CHECK-NEXT: Rename Disconnected Subregister Components +; CHECK-NEXT: Remove dead machine instructions ; CHECK-NEXT: Machine Instruction Scheduler ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Debug Variable Analysis Index: test/CodeGen/AMDGPU/dead-lane.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/dead-lane.mir @@ -0,0 +1,24 @@ +# RUN: llc -march=amdgcn -mcpu=tonga %s -start-before detect-dead-lanes -stop-before machine-scheduler -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: dead_lane +# GCN: bb.0: +# GCN-NEXT: undef %3.sub0:vreg_64 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %3.sub0, implicit $exec +# GCN-NEXT: FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, +--- +name: dead_lane +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_32} + - { id: 1, class: vgpr_32} + - { id: 2, class: vgpr_32} + - { id: 3, class: vreg_64} + - { id: 4, class: vreg_64} +body: | + bb.0: + %1:vgpr_32 = V_MAC_F32_e32 undef %0, undef %0, undef %0, implicit $exec + %2:vgpr_32 = V_MAC_F32_e32 undef %0, undef %0, undef %0, implicit $exec + %3:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1 + FLAT_STORE_DWORD undef %4, %3.sub0, 0, 0, 0, implicit $exec, implicit $flat_scr + S_ENDPGM 0 + +... Index: test/CodeGen/AMDGPU/salu-to-valu.ll =================================================================== --- test/CodeGen/AMDGPU/salu-to-valu.ll +++ test/CodeGen/AMDGPU/salu-to-valu.ll @@ -475,6 +475,7 @@ bb4: %tmp5 = phi i32 [ %tmp3, %bb2 ], [ %tmp, %bb1 ] + store volatile i32 %tmp5, i32 addrspace(1)* undef br label %bb1 } Index: test/CodeGen/AMDGPU/sdwa-peephole.ll =================================================================== --- test/CodeGen/AMDGPU/sdwa-peephole.ll +++ test/CodeGen/AMDGPU/sdwa-peephole.ll @@ -523,5 +523,6 @@ bb11: ; preds = %bb10, %bb2 %tmp12 = phi <2 x i32> [ %tmp6, %bb2 ], [ %tmp, %bb1 ] + store volatile <2 x i32> %tmp12, <2 x i32> addrspace(1)* undef br label %bb1 } Index: test/CodeGen/AMDGPU/shrink-carry.mir =================================================================== --- test/CodeGen/AMDGPU/shrink-carry.mir +++ test/CodeGen/AMDGPU/shrink-carry.mir @@ -21,6 +21,7 @@ %2 = IMPLICIT_DEF %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec %4, %5 = V_SUBBREV_U32_e64 0, %0, %3, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec ... @@ -45,6 +46,7 @@ %2 = IMPLICIT_DEF %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec %4, %5 = V_SUBB_U32_e64 %0, 0, %3, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec ... @@ -69,6 +71,7 @@ %2 = IMPLICIT_DEF %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec %4, %5 = V_ADDC_U32_e64 0, %0, %3, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec ... @@ -93,5 +96,6 @@ %2 = IMPLICIT_DEF %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec %4, %5 = V_ADDC_U32_e64 %0, 0, %3, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec ... Index: test/CodeGen/AMDGPU/spill-empty-live-interval.mir =================================================================== --- test/CodeGen/AMDGPU/spill-empty-live-interval.mir +++ test/CodeGen/AMDGPU/spill-empty-live-interval.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -stress-regalloc=1 -start-before=simple-register-coalescing -stop-after=greedy -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -disable-dce-in-ra -verify-machineinstrs -stress-regalloc=1 -start-before=simple-register-coalescing -stop-after=greedy -o - %s | FileCheck %s # https://bugs.llvm.org/show_bug.cgi?id=33620 --- Index: test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll =================================================================== --- test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll +++ test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tahiti -disable-dce-in-ra -o - %s | FileCheck %s ; Don't crash when the use of an undefined value is only detected by the ; register coalescer because it is hidden with subregister insert/extract. target triple="amdgcn--" Index: test/CodeGen/Hexagon/v6-unaligned-spill.ll =================================================================== --- test/CodeGen/Hexagon/v6-unaligned-spill.ll +++ test/CodeGen/Hexagon/v6-unaligned-spill.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon -disable-dce-in-ra < %s | FileCheck %s ; Test that we generate an unaligned vector store for a spill when a function ; has an alloca. Also, make sure the addressing mode for unaligned store does Index: test/CodeGen/X86/O3-pipeline.ll =================================================================== --- test/CodeGen/X86/O3-pipeline.ll +++ test/CodeGen/X86/O3-pipeline.ll @@ -111,6 +111,7 @@ ; CHECK-NEXT: Live Interval Analysis ; CHECK-NEXT: Simple Register Coalescing ; CHECK-NEXT: Rename Disconnected Subregister Components +; CHECK-NEXT: Remove dead machine instructions ; CHECK-NEXT: Machine Instruction Scheduler ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Debug Variable Analysis Index: test/CodeGen/X86/llc-start-stop-instance.ll =================================================================== --- test/CodeGen/X86/llc-start-stop-instance.ll +++ test/CodeGen/X86/llc-start-stop-instance.ll @@ -1,18 +1,18 @@ -; RUN: llc -mtriple=x86_64-- -debug-pass=Structure -stop-after=dead-mi-elimination,1 %s -o /dev/null 2>&1 \ +; RUN: llc -mtriple=x86_64-- -debug-pass=Structure -stop-after=dead-mi-elimination,1 -disable-dce-in-ra %s -o /dev/null 2>&1 \ ; RUN: | FileCheck -check-prefix=STOP-AFTER-DEAD1 %s -; RUN: llc -mtriple=x86_64-- -debug-pass=Structure -stop-after=dead-mi-elimination,0 %s -o /dev/null 2>&1 \ +; RUN: llc -mtriple=x86_64-- -debug-pass=Structure -stop-after=dead-mi-elimination,0 -disable-dce-in-ra %s -o /dev/null 2>&1 \ ; RUN: | FileCheck -check-prefix=STOP-AFTER-DEAD0 %s -; RUN: llc -mtriple=x86_64-- -debug-pass=Structure -stop-before=dead-mi-elimination,1 %s -o /dev/null 2>&1 \ +; RUN: llc -mtriple=x86_64-- -debug-pass=Structure -stop-before=dead-mi-elimination,1 -disable-dce-in-ra %s -o /dev/null 2>&1 \ ; RUN: | FileCheck -check-prefix=STOP-BEFORE-DEAD1 %s -; RUN: llc -mtriple=x86_64-- -debug-pass=Structure -start-before=dead-mi-elimination,1 %s -o /dev/null 2>&1 \ +; RUN: llc -mtriple=x86_64-- -debug-pass=Structure -start-before=dead-mi-elimination,1 -disable-dce-in-ra %s -o /dev/null 2>&1 \ ; RUN: | FileCheck -check-prefix=START-BEFORE-DEAD1 %s -; RUN: llc -mtriple=x86_64-- -debug-pass=Structure -start-after=dead-mi-elimination,1 %s -o /dev/null 2>&1 \ +; RUN: llc -mtriple=x86_64-- -debug-pass=Structure -start-after=dead-mi-elimination,1 -disable-dce-in-ra %s -o /dev/null 2>&1 \ ; RUN: | FileCheck -check-prefix=START-AFTER-DEAD1 %s