Index: llvm/lib/CodeGen/RegAllocFast.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocFast.cpp +++ llvm/lib/CodeGen/RegAllocFast.cpp @@ -263,6 +263,31 @@ return FrameIdx; } +/// If \p VirtReg has a single def instruction, return it. Otherwise null. +static const MachineInstr *getOneDefInstr(const MachineRegisterInfo &MRI, + Register VirtReg) { + MachineRegisterInfo::def_instr_iterator DI = MRI.def_instr_begin(VirtReg); + if (DI == MRI.def_instr_end()) + return nullptr; + + const MachineInstr *FirstDef = &*DI; + return ++DI == MRI.def_instr_end() ? FirstDef : nullptr; +} + +static bool dominates(MachineBasicBlock &MBB, + MachineBasicBlock::const_iterator A, + MachineBasicBlock::const_iterator B) { + auto MBBEnd = MBB.end(); + if (B == MBBEnd) + return true; + + MachineBasicBlock::const_iterator I = MBB.begin(); + for (; &*I != A && &*I != B; ++I) + ; + + return &*I == A; +} + /// Returns false if \p VirtReg is known to not live out of the current block. bool RegAllocFast::mayLiveOut(Register VirtReg) { if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) { @@ -270,11 +295,16 @@ return !MBB->succ_empty(); } - // If this block loops back to itself, it would be necessary to check whether - // the use comes after the def. + const MachineInstr *SelfLoopDef = nullptr; + + // If this block loops back to itself, it is necessary to check whether the + // use comes after the def. if (MBB->isSuccessor(MBB)) { - MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); - return true; + SelfLoopDef = getOneDefInstr(*MRI, VirtReg); + if (!SelfLoopDef) { + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); + return true; + } } // See if the first \p Limit uses of the register are all in the current @@ -287,6 +317,16 @@ // Cannot be live-out if there are no successors. return !MBB->succ_empty(); } + + if (SelfLoopDef) { + // Try to handle some simple cases to avoid spilling and reloading every + // value inside a self looping block. + if (SelfLoopDef == &UseInst || + !dominates(*MBB, SelfLoopDef->getIterator(), UseInst.getIterator())) { + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); + return true; + } + } } return false; Index: llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir @@ -0,0 +1,185 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=regallocfast -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: self_loop_single_def_use +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + ; GCN-LABEL: name: self_loop_single_def_use + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec + ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:vreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec + S_CBRANCH_EXECZ %bb.1, implicit $exec + + bb.2: + S_ENDPGM 0 + +... + +--- +name: self_loop_multi_def +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + ; GCN-LABEL: name: self_loop_multi_def + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec + ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec + ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec + ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:vreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec + %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec + S_CBRANCH_EXECZ %bb.1, implicit $exec + + bb.2: + S_ENDPGM 0 + +... + +# There's a single def inside the self loop, but it's also a use. + +--- +name: self_loop_def_use_same_inst +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + ; GCN-LABEL: name: self_loop_def_use_same_inst + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: renamable $vgpr0 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec + ; GCN: $vgpr1_vgpr2 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:vreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:vgpr_32 = V_ADD_U32_e32 1, undef %1, implicit $exec + GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec + S_CBRANCH_EXECZ %bb.1, implicit $exec + + bb.2: + S_ENDPGM 0 + +... + +--- +name: self_loop_def_after_use +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + ; GCN-LABEL: name: self_loop_def_after_use + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, 0, 0, implicit $exec + ; GCN: renamable $vgpr2 = V_ADD_U32_e64 1, 1, 0, implicit $exec + ; GCN: SI_SPILL_V32_SAVE killed $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:vreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + GLOBAL_STORE_DWORD %0, undef %1, 0, 0, 0, 0, implicit $exec + %1:vgpr_32 = V_ADD_U32_e64 1, 1, 0, implicit $exec + S_CBRANCH_EXECZ %bb.1, implicit $exec + + bb.2: + S_ENDPGM 0 + +... + +--- +name: self_loop_single_subreg_def_use +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + ; GCN-LABEL: name: self_loop_single_subreg_def_use + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr2_vgpr3 + ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr3, 0, 0, 0, 0, implicit $exec + ; GCN: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.1, align 4, addrspace 5) + ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:vreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + undef %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD %0, undef %1.sub1, 0, 0, 0, 0, implicit $exec + S_CBRANCH_EXECZ %bb.1, implicit $exec + + bb.2: + S_ENDPGM 0 + +...