diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -230,6 +230,9 @@ bool IsGuaranteedToExecute(MachineBasicBlock *BB); + bool isTriviallyReMaterializable(const MachineInstr &MI, + AAResults *AA) const; + void EnterScope(MachineBasicBlock *MBB); void ExitScope(MachineBasicBlock *MBB); @@ -659,6 +662,23 @@ return true; } +/// Check if \p MI is trivially remateralizable and if it does not have any +/// virtual register uses. Even though rematerializable RA might not actually +/// rematerialize it in this scenario. In that case we do not want to hoist such +/// instruction out of the loop in a belief RA will sink it back if needed. +bool MachineLICMBase::isTriviallyReMaterializable(const MachineInstr &MI, + AAResults *AA) const { + if (!TII->isTriviallyReMaterializable(MI, AA)) + return false; + + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual()) + return false; + } + + return true; +} + void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) { LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n'); @@ -1156,9 +1176,9 @@ return false; } - // Rematerializable instructions should always be hoisted since the register - // allocator can just pull them down again when needed. - if (TII->isTriviallyReMaterializable(MI, AA)) + // Rematerializable instructions should always be hoisted providing the + // register allocator can just pull them down again when needed. + if (isTriviallyReMaterializable(MI, AA)) return true; // FIXME: If there are long latency loop-invariant instructions inside the @@ -1211,7 +1231,7 @@ // High register pressure situation, only hoist if the instruction is going // to be remat'ed. - if (!TII->isTriviallyReMaterializable(MI, AA) && + if (!isTriviallyReMaterializable(MI, AA) && !MI.isDereferenceableInvariantLoad(AA)) { LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); return false; diff --git a/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir b/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir --- a/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir +++ b/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass machinelicm -o - %s | FileCheck -check-prefix=GCN %s -# FIXME: MachineLICM hoists all V_CVT instructions out of the loop increasing -# register pressure. VGPR budget at occupancy 10 is 24 vgprs. +# MachineLICM shall limit hoisting of V_CVT instructions out of the loop keeping +# register pressure within the budget. VGPR budget at occupancy 10 is 24 vgprs. --- name: test @@ -35,41 +35,41 @@ ; GCN: %20:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY2]], implicit $mode, implicit $exec ; GCN: %21:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY3]], implicit $mode, implicit $exec ; GCN: %22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY4]], implicit $mode, implicit $exec + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x04000000), %bb.1(0x7c000000) + ; GCN: liveins: $vcc + ; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %18, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %19, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %20, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %21, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %22, implicit $exec ; GCN: %23:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY5]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %23, implicit $exec ; GCN: %24:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY6]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %24, implicit $exec ; GCN: %25:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY7]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %25, implicit $exec ; GCN: %26:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY8]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %26, implicit $exec ; GCN: %27:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY9]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %27, implicit $exec ; GCN: %28:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY10]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %28, implicit $exec ; GCN: %29:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY11]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %29, implicit $exec ; GCN: %30:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY12]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %30, implicit $exec ; GCN: %31:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY13]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %31, implicit $exec ; GCN: %32:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY14]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %32, implicit $exec ; GCN: %33:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY15]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %33, implicit $exec ; GCN: %34:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY16]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %34, implicit $exec ; GCN: %35:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY17]], implicit $mode, implicit $exec - ; GCN: bb.1: - ; GCN: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - ; GCN: liveins: $vcc - ; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %18, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %19, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %20, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %21, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %22, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %23, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %24, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %25, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %26, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %27, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %28, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %29, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %30, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %31, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %32, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %33, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %34, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %35, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %35, implicit $exec ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN: S_BRANCH %bb.2 ; GCN: bb.2: