Index: lib/Target/AMDGPU/SIFixWWMLiveness.cpp =================================================================== --- lib/Target/AMDGPU/SIFixWWMLiveness.cpp +++ lib/Target/AMDGPU/SIFixWWMLiveness.cpp @@ -36,7 +36,8 @@ /// channels which are eventually used accidentally clobbered by a WWM /// instruction. We approximate this using two conditions: /// -/// 1. A definition of the variable reaches the WWM instruction. +/// 1. A definition of the variable reaches the WWM instruction (and dominates +/// it). /// 2. The variable would be live at the WWM instruction if all its defs were /// partial defs (i.e. considered as a use), ignoring normal uses. /// @@ -58,6 +59,7 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -70,6 +72,7 @@ class SIFixWWMLiveness : public MachineFunctionPass { private: + MachineDominatorTree *DomTree = nullptr; LiveIntervals *LIS = nullptr; const SIRegisterInfo *TRI; MachineRegisterInfo *MRI; @@ -90,6 +93,7 @@ StringRef getPassName() const override { return "SI Fix WWM Liveness"; } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequiredID(MachineDominatorsID); // Should preserve the same set that TwoAddressInstructions does. AU.addPreserved(); AU.addPreserved(); @@ -103,7 +107,10 @@ } // End anonymous namespace. -INITIALIZE_PASS(SIFixWWMLiveness, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(SIFixWWMLiveness, DEBUG_TYPE, + "SI fix WWM liveness", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(SIFixWWMLiveness, DEBUG_TYPE, "SI fix WWM liveness", false, false) char SIFixWWMLiveness::ID = 0; @@ -145,7 +152,8 @@ } } - // Compute the registers that reach MI. + // Compute the registers that reach MI, and have some definition that dominates + // it. SparseBitVector<> Reachable; for (auto II = ++MachineBasicBlock::reverse_iterator(WWM), IE = @@ -153,10 +161,10 @@ addDefs(*II, Reachable); } - for (idf_iterator I = ++idf_begin(MBB), - E = idf_end(MBB); - I != E; ++I) { - for (const MachineInstr &MI : **I) { + for (auto Node = DomTree->getNode(MBB)->getIDom(); + Node; Node = Node->getIDom()) { + MachineBasicBlock *DominatingMBB = Node->getBlock(); + for (const MachineInstr &MI : *DominatingMBB) { addDefs(MI, Reachable); } } @@ -172,6 +180,18 @@ LIS->removeInterval(Reg); LIS->createAndComputeVirtRegInterval(Reg); } + // Also remove kill mark from uses. + for (auto &RI : MRI->reg_instructions(Reg)) { + for (unsigned i = 0, e = RI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = RI.getOperand(i); + if (MO.isReg() && MO.getReg() == Reg) { + if (MO.isKill()) + MO.setIsKill(false); + else if (MO.isDead()) + MO.setIsDead(false); + } + } + } Modified = true; } @@ -181,6 +201,7 @@ bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) { bool Modified = false; + DomTree = &getAnalysis(); // This doesn't actually need LiveIntervals, but we can preserve them. LIS = getAnalysisIfAvailable(); Index: test/CodeGen/AMDGPU/wwm-implicit-operands.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/wwm-implicit-operands.mir @@ -0,0 +1,404 @@ +# RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx802 -verify-machineinstrs -run-pass si-fix-wwm-liveness %s -o - >/dev/null + +# This tests for MIR still being valid after SIFixWWMLiveness. I had a couple of +# problems with MIR being invalid because of the way it adds implicit operands +# to the EXIT_WWM pseudo to (conservatively) model the way that a non-live +# register may be live in other lanes, and thus clobbered by the WWM +# instructions. + +--- +name: _amdgpu_vs_main +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_32_xm0, preferred-register: '' } + - { id: 1, class: sreg_32_xm0, preferred-register: '' } + - { id: 2, class: sreg_32_xm0, preferred-register: '' } + - { id: 3, class: sreg_32_xm0, preferred-register: '' } + - { id: 4, class: sreg_32_xm0, preferred-register: '' } + - { id: 5, class: sreg_32_xm0, preferred-register: '' } + - { id: 6, class: sreg_32_xm0, preferred-register: '' } + - { id: 7, class: sreg_32_xm0, preferred-register: '' } + - { id: 8, class: vgpr_32, preferred-register: '' } + - { id: 9, class: vgpr_32, preferred-register: '' } + - { id: 10, class: sreg_32_xm0, preferred-register: '' } + - { id: 11, class: sreg_32_xm0, preferred-register: '' } + - { id: 12, class: sreg_32_xm0, preferred-register: '' } + - { id: 13, class: vgpr_32, preferred-register: '' } + - { id: 14, class: sreg_32_xm0_xexec, preferred-register: '' } + - { id: 15, class: sreg_128, preferred-register: '' } + - { id: 16, class: sreg_32_xm0, preferred-register: '' } + - { id: 17, class: sreg_32_xm0, preferred-register: '' } + - { id: 18, class: sreg_32_xm0, preferred-register: '' } + - { id: 19, class: sreg_64_xexec, preferred-register: '$vcc' } + - { id: 20, class: vgpr_32, preferred-register: '' } + - { id: 21, class: vgpr_32, preferred-register: '' } + - { id: 22, class: vgpr_32, preferred-register: '' } + - { id: 23, class: sreg_64, preferred-register: '$vcc' } + - { id: 24, class: vgpr_32, preferred-register: '' } + - { id: 25, class: sreg_32_xm0, preferred-register: '' } + - { id: 26, class: sreg_32, preferred-register: '' } + - { id: 27, class: sreg_64, preferred-register: '' } + - { id: 28, class: sreg_64, preferred-register: '' } + - { id: 29, class: sreg_64, preferred-register: '' } + - { id: 30, class: sreg_64, preferred-register: '' } + - { id: 31, class: sreg_64, preferred-register: '' } + - { id: 32, class: sreg_64, preferred-register: '' } + - { id: 33, class: sreg_64, preferred-register: '' } + - { id: 34, class: sreg_64, preferred-register: '' } + - { id: 35, class: sreg_64, preferred-register: '' } + - { id: 36, class: sreg_64, preferred-register: '' } + - { id: 37, class: sreg_64, preferred-register: '' } + - { id: 38, class: sreg_64, preferred-register: '' } + - { id: 39, class: sreg_64, preferred-register: '' } + - { id: 40, class: sreg_64, preferred-register: '' } + - { id: 41, class: sreg_64, preferred-register: '' } + - { id: 42, class: sreg_64, preferred-register: '' } + - { id: 43, class: vgpr_32, preferred-register: '' } + - { id: 44, class: vgpr_32, preferred-register: '' } + - { id: 45, class: vgpr_32, preferred-register: '' } + - { id: 46, class: vgpr_32, preferred-register: '' } + - { id: 47, class: sreg_64, preferred-register: '' } + - { id: 48, class: vgpr_32, preferred-register: '' } +liveins: +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0: + successors: %bb.2(0x40000000), %bb.1(0x40000000) + + S_CBRANCH_SCC1 %bb.2, implicit undef $scc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + + bb.2: + successors: %bb.4(0x40000000), %bb.3(0x40000000) + + S_CBRANCH_SCC1 %bb.4, implicit undef $scc + S_BRANCH %bb.3 + + bb.3: + successors: %bb.4(0x80000000) + + + bb.4: + successors: %bb.5(0x40000000), %bb.7(0x40000000) + + S_CBRANCH_SCC1 %bb.7, implicit undef $scc + S_BRANCH %bb.5 + + bb.5: + successors: %bb.7(0x40000000), %bb.6(0x40000000) + + S_CBRANCH_SCC1 %bb.7, implicit undef $scc + S_BRANCH %bb.6 + + bb.6: + successors: %bb.7(0x80000000) + + + bb.7: + successors: %bb.8(0x40000000), %bb.23(0x40000000) + + S_CBRANCH_SCC1 %bb.23, implicit undef $scc + S_BRANCH %bb.8 + + bb.8: + successors: %bb.9(0x40000000), %bb.13(0x40000000) + + S_CBRANCH_SCC1 %bb.13, implicit undef $scc + S_BRANCH %bb.9 + + bb.9: + successors: %bb.12(0x40000000), %bb.10(0x40000000) + + S_CBRANCH_SCC1 %bb.12, implicit undef $scc + S_BRANCH %bb.10 + + bb.10: + successors: %bb.11(0x40000000), %bb.12(0x40000000) + + S_CBRANCH_SCC1 %bb.12, implicit undef $scc + S_BRANCH %bb.11 + + bb.11: + successors: %bb.13(0x40000000), %bb.12(0x40000000) + + S_CBRANCH_SCC1 %bb.12, implicit undef $scc + S_BRANCH %bb.13 + + bb.12: + successors: %bb.13(0x80000000) + + + bb.13: + successors: %bb.14(0x40000000), %bb.52(0x40000000) + + S_CBRANCH_SCC1 %bb.52, implicit undef $scc + S_BRANCH %bb.14 + + bb.14: + successors: %bb.15(0x40000000), %bb.52(0x40000000) + + S_CBRANCH_SCC1 %bb.52, implicit undef $scc + + bb.15: + successors: %bb.52(0x40000000), %bb.16(0x40000000) + + S_CBRANCH_SCC1 %bb.52, implicit undef $scc + + bb.16: + successors: %bb.17(0x30000000), %bb.18(0x50000000) + + %36:sreg_64 = S_AND_B64 $exec, 0, implicit-def dead $scc + $vcc = COPY killed %36 + S_CBRANCH_VCCNZ %bb.18, implicit killed $vcc + S_BRANCH %bb.17 + + bb.17: + successors: %bb.18(0x80000000) + + + bb.18: + successors: %bb.19(0x40000000), %bb.52(0x40000000) + + S_CBRANCH_SCC1 %bb.19, implicit undef $scc + S_BRANCH %bb.52 + + bb.19: + successors: %bb.20(0x80000000) + + + bb.20: + successors: %bb.22(0x30000000), %bb.21(0x50000000) + + %38:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc + $vcc = COPY killed %38 + S_CBRANCH_VCCNZ %bb.22, implicit killed $vcc + S_BRANCH %bb.21 + + bb.21: + successors: %bb.52(0x80000000) + + S_BRANCH %bb.52 + + bb.22: + successors: %bb.52(0x80000000) + + S_BRANCH %bb.52 + + bb.23: + successors: %bb.24(0x40000000), %bb.29(0x40000000) + + S_CBRANCH_SCC1 %bb.29, implicit undef $scc + S_BRANCH %bb.24 + + bb.24: + successors: %bb.25(0x80000000) + + early-clobber %6:sreg_32_xm0 = COPY undef %7:sreg_32_xm0, implicit $exec + %48:vgpr_32 = IMPLICIT_DEF + + bb.25: + successors: %bb.28(0x40000000), %bb.26(0x40000000) + + %47:sreg_64 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + %45:vgpr_32 = COPY killed %48 + %9:vgpr_32 = COPY %6 + %8:vgpr_32 = DS_SWIZZLE_B32 killed %9, 4127, 0, implicit $exec + early-clobber %43:vgpr_32 = COPY killed %8, implicit $exec, implicit $exec + $exec = EXIT_WWM killed %47 + early-clobber %44:vgpr_32 = COPY killed %43, implicit $exec, implicit $exec + %2:sreg_32_xm0 = V_READLANE_B32 killed %44, 63 + S_CBRANCH_SCC1 %bb.28, implicit undef $scc + S_BRANCH %bb.26 + + bb.26: + successors: %bb.27(0x40000000), %bb.28(0x40000000) + + S_CBRANCH_SCC1 %bb.28, implicit undef $scc + S_BRANCH %bb.27 + + bb.27: + successors: %bb.29(0x04000000), %bb.28(0x7c000000) + + S_CBRANCH_SCC1 %bb.28, implicit undef $scc + S_BRANCH %bb.29 + + bb.28: + successors: %bb.25(0x7c000000), %bb.29(0x04000000) + + %14:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %15:sreg_128, undef %16:sreg_32_xm0, 0 :: (dereferenceable invariant load 4) + %18:sreg_32_xm0 = S_LSHR_B32 killed %14, 24, implicit-def dead $scc + %20:vgpr_32 = COPY killed %18 + %19:sreg_64_xexec = V_CMP_GE_U32_e64 killed %2, killed %20, implicit $exec + %22:vgpr_32, dead %23:sreg_64 = V_ADDC_U32_e64 0, killed %45, killed %19, implicit $exec + V_CMP_NE_U32_e32 -1, %22, implicit-def $vcc, implicit $exec + $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc + %48:vgpr_32 = COPY killed %22 + S_CBRANCH_VCCNZ %bb.25, implicit killed $vcc + S_BRANCH %bb.29 + + bb.29: + successors: %bb.30(0x40000000), %bb.52(0x40000000) + + S_CBRANCH_SCC1 %bb.52, implicit undef $scc + S_BRANCH %bb.30 + + bb.30: + successors: %bb.31(0x40000000), %bb.51(0x40000000) + + S_CBRANCH_SCC1 %bb.51, implicit undef $scc + S_BRANCH %bb.31 + + bb.31: + successors: %bb.51(0x40000000), %bb.32(0x40000000) + + S_CBRANCH_SCC1 %bb.51, implicit undef $scc + S_BRANCH %bb.32 + + bb.32: + successors: %bb.33(0x30000000), %bb.34(0x50000000) + + %28:sreg_64 = S_AND_B64 $exec, 0, implicit-def dead $scc + $vcc = COPY killed %28 + S_CBRANCH_VCCNZ %bb.34, implicit killed $vcc + S_BRANCH %bb.33 + + bb.33: + successors: %bb.34(0x80000000) + + + bb.34: + successors: %bb.36(0x40000000), %bb.35(0x40000000) + + S_CBRANCH_SCC1 %bb.36, implicit undef $scc + S_BRANCH %bb.35 + + bb.35: + successors: %bb.51(0x40000000), %bb.43(0x40000000) + + S_CBRANCH_SCC1 %bb.51, implicit undef $scc + S_BRANCH %bb.43 + + bb.36: + successors: %bb.37(0x80000000) + + + bb.37: + successors: %bb.39(0x30000000), %bb.38(0x50000000) + + %30:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc + $vcc = COPY killed %30 + S_CBRANCH_VCCNZ %bb.39, implicit killed $vcc + S_BRANCH %bb.38 + + bb.38: + successors: %bb.41(0x40000000), %bb.42(0x40000000) + + %32:sreg_64 = S_AND_B64 $exec, 0, implicit-def dead $scc + $vcc = COPY killed %32 + S_CBRANCH_VCCNZ %bb.41, implicit killed $vcc + S_BRANCH %bb.42 + + bb.39: + successors: %bb.40(0x30000000), %bb.42(0x50000000) + + %34:sreg_64 = S_AND_B64 $exec, 0, implicit-def dead $scc + $vcc = COPY killed %34 + S_CBRANCH_VCCNZ %bb.42, implicit killed $vcc + S_BRANCH %bb.40 + + bb.40: + successors: %bb.51(0x40000000), %bb.43(0x40000000) + + S_CBRANCH_SCC1 %bb.51, implicit undef $scc + S_BRANCH %bb.43 + + bb.41: + successors: %bb.51(0x40000000), %bb.43(0x40000000) + + S_CBRANCH_SCC1 %bb.51, implicit undef $scc + S_BRANCH %bb.43 + + bb.42: + successors: %bb.43(0x80000000) + + + bb.43: + successors: %bb.44(0x40000000), %bb.51(0x40000000) + + S_CBRANCH_SCC1 %bb.51, implicit undef $scc + S_BRANCH %bb.44 + + bb.44: + successors: %bb.51(0x40000000), %bb.45(0x40000000) + + S_CBRANCH_SCC1 %bb.51, implicit undef $scc + S_BRANCH %bb.45 + + bb.45: + successors: %bb.50(0x40000000), %bb.46(0x40000000) + + S_CBRANCH_SCC1 %bb.50, implicit undef $scc + S_BRANCH %bb.46 + + bb.46: + successors: %bb.47(0x40000000), %bb.50(0x40000000) + + S_CBRANCH_SCC1 %bb.50, implicit undef $scc + S_BRANCH %bb.47 + + bb.47: + successors: %bb.49(0x40000000), %bb.48(0x40000000) + + S_CBRANCH_SCC1 %bb.49, implicit undef $scc + S_BRANCH %bb.48 + + bb.48: + successors: %bb.49(0x80000000) + + + bb.49: + successors: %bb.50(0x80000000) + + + bb.50: + successors: %bb.51(0x80000000) + + + bb.51: + successors: %bb.52(0x80000000) + + + bb.52: + S_ENDPGM + +...