Index: include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- include/llvm/CodeGen/TargetInstrInfo.h +++ include/llvm/CodeGen/TargetInstrInfo.h @@ -85,6 +85,16 @@ const TargetRegisterInfo *TRI, const MachineFunction &MF) const; + /// An instruction that pollutes additional registers might still be + /// rematerializable under the assumption that those registers aren't live. + /// This is the purpose of YES_BUT_EXTRA_PHYSREG_DEFS. + enum class Rematerializability + { + NO = 0, + YES_BUT_EXTRA_PHYSREG_DEFS = 1, + YES = 2 + }; + /// Return true if the instruction is trivially rematerializable, meaning it /// has no side effects and requires no operands that aren't always available. /// This means the only allowed uses are constants and unallocatable physical @@ -92,10 +102,23 @@ /// in the function. bool isTriviallyReMaterializable(const MachineInstr &MI, AliasAnalysis *AA = nullptr) const { - return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF || - (MI.getDesc().isRematerializable() && - (isReallyTriviallyReMaterializable(MI, AA) || - isReallyTriviallyReMaterializableGeneric(MI, AA))); + return (isPotentiallyTriviallyReMaterializable(MI, AA) + == Rematerializability::YES); + } + + /// More generic version of isTriviallyReMaterializable, report instructions + /// with extra physreg defs that are rematerializable if the corresponding + /// registers are dead. + Rematerializability + isPotentiallyTriviallyReMaterializable(const MachineInstr &MI, + AliasAnalysis *AA = nullptr) const { + if(MI.getOpcode() == TargetOpcode::IMPLICIT_DEF) + return Rematerializability::YES; + if(!MI.getDesc().isRematerializable()) + return Rematerializability::NO; + if(isReallyTriviallyReMaterializable(MI, AA)) + return Rematerializability::YES; + return isReallyPotentiallyTriviallyReMaterializableGeneric(MI, AA); } protected: @@ -149,8 +172,9 @@ /// set and the target hook isReallyTriviallyReMaterializable returns false, /// this function does target-independent tests to determine if the /// instruction is really trivially rematerializable. - bool isReallyTriviallyReMaterializableGeneric(const MachineInstr &MI, - AliasAnalysis *AA) const; + Rematerializability + isReallyPotentiallyTriviallyReMaterializableGeneric(const MachineInstr &MI, + AliasAnalysis *AA) const; public: /// These methods return the opcode of the frame setup/destroy instructions Index: lib/CodeGen/CalcSpillWeights.cpp =================================================================== --- lib/CodeGen/CalcSpillWeights.cpp +++ lib/CodeGen/CalcSpillWeights.cpp @@ -129,7 +129,8 @@ } } - if (!TII.isTriviallyReMaterializable(*MI, LIS.getAliasAnalysis())) + if (TII.isPotentiallyTriviallyReMaterializable(*MI, LIS.getAliasAnalysis()) + == TargetInstrInfo::Rematerializability::NO) return false; } return true; Index: lib/CodeGen/LiveRangeEdit.cpp =================================================================== --- lib/CodeGen/LiveRangeEdit.cpp +++ lib/CodeGen/LiveRangeEdit.cpp @@ -73,7 +73,8 @@ AliasAnalysis *aa) { assert(DefMI && "Missing instruction"); ScannedRemattable = true; - if (!TII.isTriviallyReMaterializable(*DefMI, aa)) + if (TII.isPotentiallyTriviallyReMaterializable(*DefMI, aa) + == TargetInstrInfo::Rematerializability::NO) return false; Remattable.insert(VNI); return true; @@ -155,6 +156,39 @@ if (cheapAsAMove && !TII.isAsCheapAsAMove(*RM.OrigMI)) return false; + // The instruction passed the checkRematerializable criterions. Now + // that we know the context, we need to make sure the instruction does + // not def any additional live registers. + if (TII.isPotentiallyTriviallyReMaterializable(*RM.OrigMI, nullptr) + == TargetInstrInfo::Rematerializability::YES_BUT_EXTRA_PHYSREG_DEFS) + { + for(auto MO : RM.OrigMI->operands()) { + if(!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + + // Check for a well-behaved physical register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // A physreg def. We need to make sure the register is dead. + SlotIndexes *Indexes; + MachineInstr *Instruction; + MachineBasicBlock *BasicBlock; + + // The interaction with the register allocator isn't entirely clear + // to me, so to be on the safe side, never assume registers to be + // dead if they are allocatable. + if (MRI.isAllocatable(Reg) || + !(Indexes = LIS.getSlotIndexes()) || + !(Instruction = Indexes->getInstructionFromIndex(UseIdx)) || + !(BasicBlock = Instruction->getParent()) || + (BasicBlock->computeRegisterLiveness( + MRI.getTargetRegisterInfo(), Reg, Instruction) + != MachineBasicBlock::LivenessQueryResult::LQR_Dead)) { + return false; + } + } + } + } + // Verify that all used registers are available with the same values. if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx)) return false; Index: lib/CodeGen/MachineLICM.cpp =================================================================== --- lib/CodeGen/MachineLICM.cpp +++ lib/CodeGen/MachineLICM.cpp @@ -1213,7 +1213,8 @@ // Rematerializable instructions should always be hoisted since the register // allocator can just pull them down again when needed. - if (TII->isTriviallyReMaterializable(MI, AA)) + if (TII->isPotentiallyTriviallyReMaterializable(MI, AA) + != TargetInstrInfo::Rematerializability::NO) return true; // FIXME: If there are long latency loop-invariant instructions inside the @@ -1266,7 +1267,8 @@ // High register pressure situation, only hoist if the instruction is going // to be remat'ed. - if (!TII->isTriviallyReMaterializable(MI, AA) && + if (TII->isPotentiallyTriviallyReMaterializable(MI, AA) + == TargetInstrInfo::Rematerializability::NO && !MI.isDereferenceableInvariantLoad(AA)) { LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); return false; Index: lib/CodeGen/TargetInstrInfo.cpp =================================================================== --- lib/CodeGen/TargetInstrInfo.cpp +++ lib/CodeGen/TargetInstrInfo.cpp @@ -871,14 +871,15 @@ reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); } -bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( +TargetInstrInfo::Rematerializability +TargetInstrInfo::isReallyPotentiallyTriviallyReMaterializableGeneric( const MachineInstr &MI, AliasAnalysis *AA) const { const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); // Remat clients assume operand 0 is the defined register. if (!MI.getNumOperands() || !MI.getOperand(0).isReg()) - return false; + return Rematerializability::NO; unsigned DefReg = MI.getOperand(0).getReg(); // A sub-register definition can only be rematerialized if the instruction @@ -887,7 +888,7 @@ // moved safely. if (TargetRegisterInfo::isVirtualRegister(DefReg) && MI.getOperand(0).getSubReg() && MI.readsVirtualRegister(DefReg)) - return false; + return Rematerializability::NO; // A load from a fixed stack slot can be rematerialized. This may be // redundant with subsequent checks, but it's target-independent, @@ -895,20 +896,24 @@ int FrameIdx = 0; if (isLoadFromStackSlot(MI, FrameIdx) && MF.getFrameInfo().isImmutableObjectIndex(FrameIdx)) - return true; + return Rematerializability::YES; // Avoid instructions obviously unsafe for remat. if (MI.isNotDuplicable() || MI.mayStore() || MI.hasUnmodeledSideEffects()) - return false; + return Rematerializability::NO; // Don't remat inline asm. We have no idea how expensive it is // even if it's side effect free. if (MI.isInlineAsm()) - return false; + return Rematerializability::NO; // Avoid instructions which load from potentially varying memory. if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)) - return false; + return Rematerializability::NO; + + // Track whether the instruction pollutes any additional registers. + // if they are dead at the rematerialization location, it's still ok. + bool AdditionalDefs = false; // If any of the registers accessed are non-constant, conservatively assume // the instruction is not rematerializable. @@ -926,10 +931,11 @@ // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. if (!MRI.isConstantPhysReg(Reg)) - return false; + return Rematerializability::NO; } else { - // A physreg def. We can't remat it. - return false; + // A physreg def. If the register is dead, we can still rematerialize. + // This will be checked in LiveRangeEdit::canRematerializeAt. + AdditionalDefs = true; } continue; } @@ -937,17 +943,18 @@ // Only allow one virtual-register def. There may be multiple defs of the // same virtual register, though. if (MO.isDef() && Reg != DefReg) - return false; + return Rematerializability::NO; // Don't allow any virtual-register uses. Rematting an instruction with // virtual register uses would length the live ranges of the uses, which // is not necessarily a good idea, certainly not "trivial". if (MO.isUse()) - return false; + return Rematerializability::NO; } // Everything checked out. - return true; + return AdditionalDefs ? Rematerializability::YES_BUT_EXTRA_PHYSREG_DEFS + : Rematerializability::YES; } int TargetInstrInfo::getSPAdjust(const MachineInstr &MI) const { Index: lib/Target/ARM/ARMInstrThumb.td =================================================================== --- lib/Target/ARM/ARMInstrThumb.td +++ lib/Target/ARM/ARMInstrThumb.td @@ -1153,7 +1153,7 @@ [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; // Move register -let isMoveImm = 1 in +let isMoveImm = 1, isReMaterializable = 1 in def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi, "mov", "\t$Rd, $imm8", [(set tGPR:$Rd, imm0_255:$imm8)]>, Index: test/CodeGen/Thumb/movi8remat.mir =================================================================== --- /dev/null +++ test/CodeGen/Thumb/movi8remat.mir @@ -0,0 +1,117 @@ +# RUN: llc -start-before greedy %s -o - | FileCheck %s +--- | + ; ModuleID = 'movi8remat.ll' + source_filename = "movi8remat_test.ll" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv6m-apple--eabi" + + declare void @consume_value(i32) + + declare i32 @get_value(...) + + declare void @consume_five_values(i32, i32, i32, i32, i32) + + define void @this_spills_the_immediate_constant() { + tail call void @consume_value(i32 42) + %1 = tail call i32 (...) @get_value() + %2 = tail call i32 (...) @get_value() + %3 = tail call i32 (...) @get_value() + %4 = tail call i32 (...) @get_value() + %5 = tail call i32 (...) @get_value() + tail call void @consume_value(i32 42) + tail call void @consume_five_values(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) + ret void + } + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #0 + + attributes #0 = { nounwind } + +... +--- +name: this_spills_the_immediate_constant +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +registers: + - { id: 0, class: tgpr, preferred-register: '' } + - { id: 1, class: tgpr, preferred-register: '' } + - { id: 2, class: tgpr, preferred-register: '' } + - { id: 3, class: tgpr, preferred-register: '' } + - { id: 4, class: tgpr, preferred-register: '' } + - { id: 5, class: tgpr, preferred-register: '' } + - { id: 6, class: tgpr, preferred-register: '' } +liveins: +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 4 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0 (%ir-block.0): + ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + %0:tgpr, dead $cpsr = tMOVi8 42, 14, $noreg + $r0 = COPY %0 + tBL 14, $noreg, @consume_value, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp + ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + tBL 14, $noreg, @get_value, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0 + ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + %1:tgpr = COPY killed $r0 + ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + tBL 14, $noreg, @get_value, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0 + ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + %2:tgpr = COPY killed $r0 + ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + tBL 14, $noreg, @get_value, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0 + ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + %3:tgpr = COPY killed $r0 + ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + tBL 14, $noreg, @get_value, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0 + ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + %4:tgpr = COPY killed $r0 + ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + tBL 14, $noreg, @get_value, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0 + ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + %5:tgpr = COPY killed $r0 + ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + $r0 = COPY %0 + tBL 14, $noreg, @consume_value, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp + ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + ADJCALLSTACKDOWN 4, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + %6:tgpr = COPY $sp + tSTRi %5, %6, 0, 14, $noreg :: (store 4 into stack) + $r0 = COPY %1 + $r1 = COPY %2 + $r2 = COPY %3 + $r3 = COPY %4 + tBL 14, $noreg, @consume_five_values, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit killed $r1, implicit killed $r2, implicit killed $r3, implicit-def $sp + ADJCALLSTACKUP 4, 0, 14, $noreg, implicit-def dead $sp, implicit $sp + tBX_RET 14, $noreg + +... + +# CHECK: movs r0, #42 +# CHECK: movs r0, #42