Index: lib/CodeGen/DeadMachineInstructionElim.cpp =================================================================== --- lib/CodeGen/DeadMachineInstructionElim.cpp +++ lib/CodeGen/DeadMachineInstructionElim.cpp @@ -10,9 +10,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -34,7 +32,6 @@ const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; const TargetInstrInfo *TII; - LiveIntervals *LIS; BitVector LivePhysRegs; public: @@ -44,7 +41,7 @@ } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); + AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -81,15 +78,9 @@ unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { // Don't delete live physreg defs, or any reserved register defs. - // Do not remove physreg defs if we have LIS as we may be unable - // to accurately recompute its liveness. - if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg) || LIS) + if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg)) return false; } else { - // An instruction can also use its def in case if it is a tied operand. - // TODO: Technically we can also remove it if def dominates the use. - // This can happen when two instructions define different subregs - // of the same register. for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) { if (&Use != MI) // This def has a non-debug use. Don't delete the instruction! @@ -111,8 +102,6 @@ MRI = &MF.getRegInfo(); TRI = MF.getSubtarget().getRegisterInfo(); TII = MF.getSubtarget().getInstrInfo(); - LIS = getAnalysisIfAvailable(); - DenseSet RecalcRegs; // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will @@ -138,14 +127,6 @@ // If the instruction is dead, delete it! if (isDead(MI)) { LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); - if (LIS) { - for (const MachineOperand &MO : MI->operands()) { - if (MO.isReg() && TRI->isVirtualRegister(MO.getReg())) - RecalcRegs.insert(MO.getReg()); - } - LIS->RemoveMachineInstrFromMaps(*MI); - } - // It is possible that some DBG_VALUE instructions refer to this // instruction. They get marked as undef and will be deleted // in the live debug variable analysis. @@ -189,12 +170,5 @@ } LivePhysRegs.clear(); - - for (auto Reg : RecalcRegs) { - LIS->removeInterval(Reg); - if (!MRI->reg_empty(Reg)) - LIS->createAndComputeVirtRegInterval(Reg); - } - return AnyChanges; } Index: test/CodeGen/AMDGPU/bswap.ll =================================================================== --- test/CodeGen/AMDGPU/bswap.ll +++ test/CodeGen/AMDGPU/bswap.ll @@ -328,47 +328,47 @@ define amdgpu_kernel void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) nounwind { ; SI-LABEL: test_bswap_v2i64: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_mov_b32 s31, 0xff0000 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_load_dwordx4 s[8:11], s[6:7], 0x0 -; SI-NEXT: s_mov_b32 s7, 0 +; SI-NEXT: s_load_dwordx4 s[4:7], s[10:11], 0x0 +; SI-NEXT: s_mov_b32 s11, 0 ; SI-NEXT: s_mov_b32 s22, 0xff000000 ; SI-NEXT: s_mov_b32 s27, 0xff00 ; SI-NEXT: s_movk_i32 s25, 0xff -; SI-NEXT: s_mov_b32 s13, s7 -; SI-NEXT: s_mov_b32 s14, s7 -; SI-NEXT: s_mov_b32 s16, s7 -; SI-NEXT: s_mov_b32 s18, s7 -; SI-NEXT: s_mov_b32 s20, s7 -; SI-NEXT: s_mov_b32 s23, s7 -; SI-NEXT: s_mov_b32 s24, s7 -; SI-NEXT: s_mov_b32 s26, s7 -; SI-NEXT: s_mov_b32 s28, s7 -; SI-NEXT: s_mov_b32 s30, s7 -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_mov_b32 s13, s11 +; SI-NEXT: s_mov_b32 s14, s11 +; SI-NEXT: s_mov_b32 s16, s11 +; SI-NEXT: s_mov_b32 s18, s11 +; SI-NEXT: s_mov_b32 s20, s11 +; SI-NEXT: s_mov_b32 s23, s11 +; SI-NEXT: s_mov_b32 s24, s11 +; SI-NEXT: s_mov_b32 s26, s11 +; SI-NEXT: s_mov_b32 s28, s11 +; SI-NEXT: s_mov_b32 s30, s11 +; SI-NEXT: s_mov_b32 s0, s8 +; SI-NEXT: s_mov_b32 s1, s9 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_mov_b32_e32 v0, s10 -; SI-NEXT: v_alignbit_b32 v1, s11, v0, 24 -; SI-NEXT: v_alignbit_b32 v0, s11, v0, 8 -; SI-NEXT: s_lshr_b32 s6, s11, 24 -; SI-NEXT: s_lshr_b32 s12, s11, 8 -; SI-NEXT: s_lshl_b64 s[4:5], s[10:11], 8 -; SI-NEXT: s_lshl_b64 s[32:33], s[10:11], 24 -; SI-NEXT: s_lshl_b32 s19, s10, 24 -; SI-NEXT: s_lshl_b32 s21, s10, 8 -; SI-NEXT: v_mov_b32_e32 v2, s8 -; SI-NEXT: v_alignbit_b32 v3, s9, v2, 24 -; SI-NEXT: v_alignbit_b32 v2, s9, v2, 8 -; SI-NEXT: s_lshr_b32 s32, s9, 8 -; SI-NEXT: s_lshl_b64 s[10:11], s[8:9], 8 -; SI-NEXT: s_and_b32 s15, s5, s25 -; SI-NEXT: s_lshl_b64 s[4:5], s[8:9], 24 -; SI-NEXT: s_lshl_b32 s29, s8, 24 -; SI-NEXT: s_lshl_b32 s4, s8, 8 +; SI-NEXT: v_mov_b32_e32 v0, s6 +; SI-NEXT: v_alignbit_b32 v1, s7, v0, 24 +; SI-NEXT: v_alignbit_b32 v0, s7, v0, 8 +; SI-NEXT: s_lshr_b32 s10, s7, 24 +; SI-NEXT: s_lshr_b32 s12, s7, 8 +; SI-NEXT: s_lshl_b64 s[8:9], s[6:7], 8 +; SI-NEXT: s_lshl_b64 s[32:33], s[6:7], 24 +; SI-NEXT: s_lshl_b32 s19, s6, 24 +; SI-NEXT: s_lshl_b32 s21, s6, 8 +; SI-NEXT: v_mov_b32_e32 v2, s4 +; SI-NEXT: v_alignbit_b32 v3, s5, v2, 24 +; SI-NEXT: v_alignbit_b32 v2, s5, v2, 8 +; SI-NEXT: s_lshr_b32 s32, s5, 8 +; SI-NEXT: s_lshl_b64 s[6:7], s[4:5], 8 +; SI-NEXT: s_and_b32 s15, s9, s25 +; SI-NEXT: s_lshl_b64 s[8:9], s[4:5], 24 +; SI-NEXT: s_lshl_b32 s29, s4, 24 +; SI-NEXT: s_lshl_b32 s4, s4, 8 ; SI-NEXT: v_and_b32_e32 v1, s31, v1 ; SI-NEXT: v_and_b32_e32 v0, s22, v0 ; SI-NEXT: s_and_b32 s12, s12, s27 @@ -377,28 +377,28 @@ ; SI-NEXT: v_and_b32_e32 v3, s31, v3 ; SI-NEXT: v_and_b32_e32 v2, s22, v2 ; SI-NEXT: s_and_b32 s22, s32, s27 -; SI-NEXT: s_and_b32 s25, s11, s25 -; SI-NEXT: s_and_b32 s27, s5, s27 +; SI-NEXT: s_and_b32 s25, s7, s25 +; SI-NEXT: s_and_b32 s27, s9, s27 ; SI-NEXT: s_and_b32 s31, s4, s31 ; SI-NEXT: v_or_b32_e32 v0, v0, v1 -; SI-NEXT: s_or_b64 s[4:5], s[12:13], s[6:7] -; SI-NEXT: s_or_b64 s[10:11], s[16:17], s[14:15] +; SI-NEXT: s_or_b64 s[6:7], s[12:13], s[10:11] +; SI-NEXT: s_or_b64 s[8:9], s[16:17], s[14:15] ; SI-NEXT: s_or_b64 s[12:13], s[18:19], s[20:21] ; SI-NEXT: v_or_b32_e32 v1, v2, v3 -; SI-NEXT: s_lshr_b32 s6, s9, 24 -; SI-NEXT: s_or_b64 s[8:9], s[26:27], s[24:25] +; SI-NEXT: s_lshr_b32 s10, s5, 24 +; SI-NEXT: s_or_b64 s[4:5], s[26:27], s[24:25] ; SI-NEXT: s_or_b64 s[14:15], s[28:29], s[30:31] +; SI-NEXT: v_or_b32_e32 v0, s6, v0 +; SI-NEXT: v_mov_b32_e32 v3, s7 +; SI-NEXT: s_or_b64 s[6:7], s[12:13], s[8:9] +; SI-NEXT: s_or_b64 s[8:9], s[22:23], s[10:11] +; SI-NEXT: s_or_b64 s[4:5], s[14:15], s[4:5] +; SI-NEXT: v_or_b32_e32 v2, s6, v0 +; SI-NEXT: v_or_b32_e32 v3, s7, v3 +; SI-NEXT: v_or_b32_e32 v0, s8, v1 +; SI-NEXT: v_mov_b32_e32 v1, s9 ; SI-NEXT: v_or_b32_e32 v0, s4, v0 -; SI-NEXT: v_mov_b32_e32 v3, s5 -; SI-NEXT: s_or_b64 s[4:5], s[12:13], s[10:11] -; SI-NEXT: s_or_b64 s[6:7], s[22:23], s[6:7] -; SI-NEXT: s_or_b64 s[8:9], s[14:15], s[8:9] -; SI-NEXT: v_or_b32_e32 v2, s4, v0 -; SI-NEXT: v_or_b32_e32 v3, s5, v3 -; SI-NEXT: v_or_b32_e32 v0, s6, v1 -; SI-NEXT: v_mov_b32_e32 v1, s7 -; SI-NEXT: v_or_b32_e32 v0, s8, v0 -; SI-NEXT: v_or_b32_e32 v1, s9, v1 +; SI-NEXT: v_or_b32_e32 v1, s5, v1 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; Index: test/CodeGen/AMDGPU/dce-disjoint-intervals.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/dce-disjoint-intervals.mir @@ -0,0 +1,38 @@ +# RUN: llc -mtriple=amdgcn-- -run-pass=liveintervals,dead-mi-elimination,simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +# This is used to fail verififcation if MachineDCE tracks LIS. + +# GCN-LABEL: name: foo +# GCN: S_ENDPGM +--- +name: foo +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + %10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0 + S_NOP 0, implicit-def %4:sreg_128, implicit %10.sub1:sreg_128 + S_CBRANCH_SCC0 %bb.3, implicit undef $scc + S_BRANCH %bb.1 + + bb.1: + S_CBRANCH_SCC0 %bb.2, implicit undef $scc + S_BRANCH %bb.3 + + bb.2: + %8:sreg_32_xm0 = COPY %4.sub1:sreg_128 + %7:sreg_32_xm0 = COPY %10.sub1:sreg_128 + S_BRANCH %bb.4 + + bb.3: + %10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0 + %7:sreg_32_xm0 = COPY %10.sub1:sreg_128 + %8:sreg_32_xm0 = COPY %10.sub2:sreg_128 + + bb.4: + S_NOP 0, implicit %10 + $sgpr0 = COPY %8:sreg_32_xm0 + $sgpr1 = COPY %7:sreg_32_xm0 + S_ENDPGM 0, implicit $sgpr0, implicit $sgpr1 +... Index: test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll @@ -15,14 +15,13 @@ } ; FUNC-LABEL: {{^}}ds_ordered_swap_conditional: -; GCN: v_mov_b32_e32 v1, v0 -; GCN: v_cmp_ne_u32_e32 vcc, 0, v1 +; GCN: v_cmp_ne_u32_e32 vcc, 0, v0 ; GCN: s_and_saveexec_b64 s[[SAVED:\[[0-9]+:[0-9]+\]]], vcc ; // We have to use s_cbranch, because ds_ordered_count has side effects with EXEC=0 ; GCN: s_cbranch_execz [[BB:BB._.]] ; GCN: s_mov_b32 m0, s0 ; VIGFX9-NEXT: s_nop 0 -; GCN-NEXT: ds_ordered_count v0, v1 offset:4868 gds +; GCN-NEXT: ds_ordered_count v1, v0 offset:4868 gds ; GCN-NEXT: [[BB]]: ; // Wait for expcnt(0) before modifying EXEC ; GCN-NEXT: s_waitcnt expcnt(0)