Index: llvm/include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -128,7 +128,7 @@ bool AllowVRegs = false) const { return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF || (MI.getDesc().isRematerializable() && - (isReallyTriviallyReMaterializable(MI, AA) || + (isReallyTriviallyReMaterializable(MI, AA, AllowVRegs) || isReallyTriviallyReMaterializableGeneric(MI, AA, AllowVRegs))); } @@ -147,7 +147,8 @@ /// not always available. /// Requirements must be check as stated in isTriviallyReMaterializable() . virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { + AAResults *AA, + bool AllowVRegs) const { return false; } Index: llvm/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -178,7 +178,8 @@ } bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + AAResults *AA, + bool AllowVRegs) const override; bool isIgnorableUse(const MachineOperand &MO) const override; Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -107,7 +107,8 @@ } bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { + AAResults *AA, + bool AllowVRegs) const { if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI)) { // Normally VALU use of exec would block the rematerialization, but that // is OK in this case to have an implicit exec read as all VALU do. @@ -116,9 +117,18 @@ // Another potential implicit use is mode register. The core logic of // the RA will not attempt rematerialization if mode is set anywhere // in the function, otherwise it is safe since mode is not changed. - return !MI.hasImplicitDef() && - MI.getNumImplicitOperands() == MI.getDesc().getNumImplicitUses() && - !MI.mayRaiseFPException(); + if (MI.hasImplicitDef() || + MI.getNumImplicitOperands() != MI.getDesc().getNumImplicitUses() || + MI.mayRaiseFPException()) + return false; + + if (AllowVRegs) + return true; + + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual()) + return false; + } } return false; Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -480,7 +480,8 @@ const TargetInstrInfo *TII) const; bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + AAResults *AA, + bool AllowVRegs) const override; private: /// Modeling special VFP / NEON fp MLA / MLS hazards. Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -6493,7 +6493,7 @@ } bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { + AAResults *AA, bool AllowVRegs) const { // Try hard to rematerialize any VCTPs because if we spill P0, it will block // the tail predication conversion. This means that the element count // register has to be live for longer, but that has to be better than @@ -6515,4 +6515,3 @@ return (MF.getSubtarget().hardenSlsBlr()) ? ARM::BLX_pred_noip : ARM::BLX_pred; } - Index: llvm/lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -401,7 +401,8 @@ unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + AAResults *AA, + bool AllowVRegs) const override; unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; Index: llvm/lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1087,7 +1087,8 @@ // For opcodes with the ReMaterializable flag set, this function is called to // verify the instruction is really rematable. bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const { + AliasAnalysis *AA, + bool AllowVRegs) const { switch (MI.getOpcode()) { default: // This function should only be called for opcodes with the ReMaterializable Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h =================================================================== --- llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -44,7 +44,8 @@ const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; } bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + AAResults *AA, + bool AllowVRegs) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp =================================================================== --- llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -40,7 +40,7 @@ RI(STI.getTargetTriple()) {} bool WebAssemblyInstrInfo::isReallyTriviallyReMaterializable( - const MachineInstr &MI, AAResults *AA) const { + const MachineInstr &MI, AAResults *AA, bool AllowVRegs) const { switch (MI.getOpcode()) { case WebAssembly::CONST_I32: case WebAssembly::CONST_I64: Index: llvm/lib/Target/X86/X86InstrInfo.h =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.h +++ llvm/lib/Target/X86/X86InstrInfo.h @@ -233,7 +233,8 @@ int &FrameIndex) const override; bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + AAResults *AA, + bool AllowVRegs) const override; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, Index: llvm/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.cpp +++ llvm/lib/Target/X86/X86InstrInfo.cpp @@ -964,7 +964,8 @@ } bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { + AAResults *AA, + bool AllowVRegs) const { switch (MI.getOpcode()) { default: // This function should only be called for opcodes with the ReMaterializable Index: llvm/test/CodeGen/AMDGPU/licm-regpressure.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/licm-regpressure.mir +++ llvm/test/CodeGen/AMDGPU/licm-regpressure.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass machinelicm -o - %s | FileCheck -check-prefix=GCN %s -# FIXME: MachineLICM hoists all V_CVT instructions out of the loop increasing -# register pressure. VGPR budget at occupancy 10 is 24 vgprs. +# MachineLICM shall limit hoisting of V_CVT instructions out of the loop keeping +# register pressure within the budget. VGPR budget at occupancy 10 is 24 vgprs. --- name: test @@ -35,41 +35,41 @@ ; GCN: %20:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY2]], implicit $mode, implicit $exec ; GCN: %21:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY3]], implicit $mode, implicit $exec ; GCN: %22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY4]], implicit $mode, implicit $exec + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x04000000), %bb.1(0x7c000000) + ; GCN: liveins: $vcc + ; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %18, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %19, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %20, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %21, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %22, implicit $exec ; GCN: %23:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY5]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %23, implicit $exec ; GCN: %24:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY6]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %24, implicit $exec ; GCN: %25:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY7]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %25, implicit $exec ; GCN: %26:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY8]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %26, implicit $exec ; GCN: %27:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY9]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %27, implicit $exec ; GCN: %28:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY10]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %28, implicit $exec ; GCN: %29:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY11]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %29, implicit $exec ; GCN: %30:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY12]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %30, implicit $exec ; GCN: %31:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY13]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %31, implicit $exec ; GCN: %32:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY14]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %32, implicit $exec ; GCN: %33:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY15]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %33, implicit $exec ; GCN: %34:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY16]], implicit $mode, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %34, implicit $exec ; GCN: %35:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY17]], implicit $mode, implicit $exec - ; GCN: bb.1: - ; GCN: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - ; GCN: liveins: $vcc - ; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %18, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %19, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %20, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %21, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %22, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %23, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %24, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %25, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %26, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %27, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %28, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %29, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %30, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %31, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %32, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %33, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %34, implicit $exec - ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, %35, implicit $exec + ; GCN: $vcc = V_CMP_EQ_U64_e64 $vcc, killed %35, implicit $exec ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN: S_BRANCH %bb.2 ; GCN: bb.2: