Index: include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- include/llvm/CodeGen/TargetInstrInfo.h +++ include/llvm/CodeGen/TargetInstrInfo.h @@ -300,6 +300,13 @@ return ~0U; } + /// Returns the original SrcReg unless it is the target of a copy-like + /// operation, in which case we chain backwards through all such operations + /// to the ultimate source register. If a physical register is encountered, + /// we stop the search. + virtual unsigned lookThruCopyLike(unsigned SrcReg, + const MachineRegisterInfo *MRI) const; + /// Return true if the instruction is as cheap as a move instruction. /// /// Targets for different archs need to override this, and different Index: lib/CodeGen/MachineLICM.cpp =================================================================== --- lib/CodeGen/MachineLICM.cpp +++ lib/CodeGen/MachineLICM.cpp @@ -71,6 +71,10 @@ cl::desc("MachineLICM should sink instructions into " "loops to avoid register spills"), cl::init(false), cl::Hidden); +static cl::opt +HoistConstStores("hoist-const-stores", + cl::desc("Hoist invariant stores"), + cl::init(true), cl::Hidden); STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); @@ -82,6 +86,8 @@ "Number of hoisted machine instructions CSEed"); STATISTIC(NumPostRAHoisted, "Number of machine instructions hoisted out of loops post regalloc"); +STATISTIC(NumStoreConst, + "Number of stores of const phys reg hoisted out of loops"); namespace { @@ -726,6 +732,10 @@ MachineInstr *MI = &*MII; if (!Hoist(MI, Preheader)) UpdateRegPressure(MI); + // If we have hoisted an instruction that may store, it can only be a + // constant store. + else if (MI->mayStore()) + NumStoreConst++; MII = NextMII; } @@ -889,13 +899,82 @@ return false; } +// This function iterates through all the operands of the input store MI and +// checks that each register operand statisfies isCallerPreservedPhysReg. +// This means, the value being stored and the address where it is being stored +// is constant throughout the body of the function (not including prologue and +// epilogue). When called with an MI that isn't a store, it returns false. +static bool isInvariantStore(const MachineInstr &MI, + const TargetRegisterInfo *TRI, + const MachineRegisterInfo *MRI, + const TargetInstrInfo *TII) { + + if (!MI.mayStore() || MI.hasUnmodeledSideEffects() || + (MI.getNumOperands() == 0)) + return false; + + // Check that all register operands are caller-preserved physical registers. + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + // If operand is a virtual register, check if it comes from a copy of a + // physical register. + if (TargetRegisterInfo::isVirtualRegister(Reg)) + Reg = TII->lookThruCopyLike(MO.getReg(), MRI); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF())) + return false; + } + } + return true; +} + +// Return true if the input MI is a copy instruction that feeds an invariant +// store instruction. This means that the src of the copy has to satisfy +// isCallerPreservedPhysReg and atleast one of it's users should satisfy +// isInvariantStore. +static bool isCopyFeedingInvariantStore(const MachineInstr &MI, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII) { + + // FIXME: If targets would like to look through instructions that aren't + // pure copies, this can be updated to a query. + if (!MI.isCopy()) + return false; + + const MachineFunction *MF = MI.getMF(); + // Check that we are copying a constant physical register. + unsigned CopySrcReg = MI.getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(CopySrcReg)) + return false; + + if (!TRI->isCallerPreservedPhysReg(CopySrcReg, *MF)) + return false; + + unsigned CopyDstReg = MI.getOperand(0).getReg(); + // Check if any of the uses of the copy are invariant stores. + assert (TargetRegisterInfo::isVirtualRegister(CopyDstReg) && + "copy dst is not a virtual reg"); + + for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) { + if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI, TII)) + return true; + } + return false; +} + + /// Returns true if the instruction may be a suitable candidate for LICM. /// e.g. If the instruction is a call, then it's obviously not safe to hoist it. bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { // Check if it's safe to move the instruction. bool DontMoveAcrossStore = true; - if (!I.isSafeToMove(AA, DontMoveAcrossStore)) + if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) && + !(HoistConstStores && isInvariantStore(I, TRI, MRI, TII))) { return false; + } // If it is load then check if it is guaranteed to execute by making sure that // it dominates all exiting blocks. If it doesn't, then there is a path out of @@ -1115,6 +1194,9 @@ // - When hoisting the last use of a value in the loop, that value no longer // needs to be live in the loop. This lowers register pressure in the loop. + if (HoistConstStores && isCopyFeedingInvariantStore(MI, MRI, TRI, TII)) + return true; + bool CheapInstr = IsCheapInstruction(MI); bool CreatesCopy = HasLoopPHIUse(&MI); Index: lib/CodeGen/TargetInstrInfo.cpp =================================================================== --- lib/CodeGen/TargetInstrInfo.cpp +++ lib/CodeGen/TargetInstrInfo.cpp @@ -309,6 +309,28 @@ return !isPredicated(MI); } +unsigned TargetInstrInfo::lookThruCopyLike(unsigned SrcReg, + const MachineRegisterInfo *MRI) const { + while (true) { + const MachineInstr *MI = MRI->getVRegDef(SrcReg); + if (!MI->isCopyLike()) + return SrcReg; + + unsigned CopySrcReg; + if (MI->isCopy()) + CopySrcReg = MI->getOperand(1).getReg(); + else { + assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike"); + CopySrcReg = MI->getOperand(2).getReg(); + } + + if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) + return CopySrcReg; + + SrcReg = CopySrcReg; + } +} + bool TargetInstrInfo::PredicateInstruction( MachineInstr &MI, ArrayRef Pred) const { bool MadeChange = false; Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -359,13 +359,6 @@ MachineInstr **KilledDef = nullptr) const; void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const; - // This is used to find the "true" source register for n - // Machine instruction. Returns the original SrcReg unless it is the target - // of a copy-like operation, in which case we chain backwards through all - // such operations to the ultimate source register. If a - // physical register is encountered, we stop the search. - static unsigned lookThruCopyLike(unsigned SrcReg, - const MachineRegisterInfo *MRI); bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III) const; }; Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2151,28 +2151,6 @@ return false; } -unsigned PPCInstrInfo::lookThruCopyLike(unsigned SrcReg, - const MachineRegisterInfo *MRI) { - while (true) { - MachineInstr *MI = MRI->getVRegDef(SrcReg); - if (!MI->isCopyLike()) - return SrcReg; - - unsigned CopySrcReg; - if (MI->isCopy()) - CopySrcReg = MI->getOperand(1).getReg(); - else { - assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike"); - CopySrcReg = MI->getOperand(2).getReg(); - } - - if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) - return CopySrcReg; - - SrcReg = CopySrcReg; - } -} - // Essentially a compile-time implementation of a compare->isel sequence. // It takes two constants to compare, along with the true/false registers // and the comparison type (as a subreg to a CR field) and returns one Index: lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.cpp +++ lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -65,6 +65,12 @@ EnableGPRToVecSpills("ppc-enable-gpr-to-vsr-spills", cl::Hidden, cl::init(false), cl::desc("Enable spills from gpr to vsr rather than stack")); +static cl::opt +StackPtrConst("ppc-stack-ptr-caller-preserved", + cl::desc("Consider R1 caller preserved so stack saves of " + "caller preserved registers can be LICM candidates"), + cl::init(true), cl::Hidden); + PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM) : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR, TM.isPPC64() ? 0 : 1, @@ -304,15 +310,26 @@ bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const { assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); - if (TM.isELFv2ABI() && PhysReg == PPC::X2) { + const PPCSubtarget &Subtarget = MF.getSubtarget(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!TM.isPPC64()) + return false; + + if (!Subtarget.isSVR4ABI()) + return false; + if (PhysReg == PPC::X2) // X2 is guaranteed to be preserved within a function if it is reserved. // The reason it's reserved is that it's the TOC pointer (and the function // uses the TOC). In functions where it isn't reserved (i.e. leaf functions // with no TOC access), we can't claim that it is preserved. return (getReservedRegs(MF).test(PPC::X2)); - } else { - return false; - } + if (StackPtrConst && (PhysReg == PPC::X1) && !MFI.hasVarSizedObjects() + && !MFI.hasOpaqueSPAdjustment()) + // The value of the stack pointer does not change within a function after + // the prologue and before the epilogue if there are no dynamic allocations + // and no inline asm which clobbers X1. + return true; + return false; } unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, Index: test/CodeGen/PowerPC/extra-toc-reg-deps.ll =================================================================== --- test/CodeGen/PowerPC/extra-toc-reg-deps.ll +++ test/CodeGen/PowerPC/extra-toc-reg-deps.ll @@ -75,9 +75,7 @@ ; CHECK: bctrl ; CHECK: ld 2, 40(1) -; CHECK: addis [[REG1:[0-9]+]], 2, .LC0@toc@ha ; CHECK: std 2, 40(1) -; CHECK: ld {{[0-9]+}}, .LC0@toc@l([[REG1]]) ; CHECK: {{mr|ld}} 2, ; CHECK: mtctr ; CHECK: bctrl Index: test/CodeGen/PowerPC/loop-hoist-toc-save.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/loop-hoist-toc-save.ll @@ -0,0 +1,129 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -hoist-const-stores -ppc-stack-ptr-caller-preserved < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -hoist-const-stores -ppc-stack-ptr-caller-preserved < %s | FileCheck %s -check-prefix=CHECKBE + +; Test hoist out of single loop +define signext i32 @test1(i32 signext %lim, i32 (i32)* nocapture %Func) { +entry: +; CHECK-LABEL: test1 +; CHECK: for.body.preheader +; CHECK: std 2, 24(1) +; CHECK: for.body +; CHECK-NOT: std 2, 24(1) +; CHECKBE-LABEL: test1 +; CHECKBE: for.body.preheader +; CHECKBE: std 2, 40(1) +; CHECKBE: for.body +; CHECKBE-NOT: std 2, 40(1) + + %cmp6 = icmp sgt i32 %lim, 0 + br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %Sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %Sum.0.lcssa + +for.body: ; preds = %for.body.preheader, %for.body + %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %Sum.07 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %call = tail call signext i32 %Func(i32 signext %i.08) + %add = add nsw i32 %call, %Sum.07 + %inc = add nuw nsw i32 %i.08, 1 + %exitcond = icmp eq i32 %inc, %lim + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +; Test hoist of nested loop goes to outter loop preheader +define signext i32 @test2(i32 signext %lim, i32 (i32)* nocapture %Func) { +entry: +; CHECK-LABEL: test2 +; CHECK: for.body4.lr.ph.preheader +; CHECK: std 2, 24(1) +; CHECK: for.body4.lr.ph +; CHECK-NOT: std 2, 24(1) +; CHECKBE-LABEL: test2 +; CHECKBE: for.body4.lr.ph.preheader +; CHECKBE: std 2, 40(1) +; CHECKBE: for.body4.lr.ph +; CHECKBE-NOT: std 2, 40(1) + + %cmp20 = icmp sgt i32 %lim, 0 + br i1 %cmp20, label %for.body4.lr.ph.preheader, label %for.cond.cleanup + +for.body4.lr.ph.preheader: ; preds = %entry + br label %for.body4.lr.ph + +for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry + %Sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.cond.cleanup3 ] + ret i32 %Sum.0.lcssa + +for.body4.lr.ph: ; preds = %for.body4.lr.ph.preheader, %for.cond.cleanup3 + %j.022 = phi i32 [ %inc6, %for.cond.cleanup3 ], [ 0, %for.body4.lr.ph.preheader ] + %Sum.021 = phi i32 [ %add, %for.cond.cleanup3 ], [ 0, %for.body4.lr.ph.preheader ] + br label %for.body4 + +for.cond.cleanup3: ; preds = %for.body4 + %inc6 = add nuw nsw i32 %j.022, 1 + %exitcond24 = icmp eq i32 %inc6, %lim + br i1 %exitcond24, label %for.cond.cleanup, label %for.body4.lr.ph + +for.body4: ; preds = %for.body4, %for.body4.lr.ph + %i.019 = phi i32 [ %j.022, %for.body4.lr.ph ], [ %inc, %for.body4 ] + %Sum.118 = phi i32 [ %Sum.021, %for.body4.lr.ph ], [ %add, %for.body4 ] + %call = tail call signext i32 %Func(i32 signext %i.019) + %add = add nsw i32 %call, %Sum.118 + %inc = add nuw nsw i32 %i.019, 1 + %exitcond = icmp eq i32 %inc, %lim + br i1 %exitcond, label %for.cond.cleanup3, label %for.body4 +} + +; Test hoist out of if statement with low branch probability +; FIXME: we shouldn't hoist in such cases as it could increase the number +; of stores after hoisting. +define signext i32 @test3(i32 signext %lim, i32 (i32)* nocapture %Func) { +entry: +; CHECK-LABEL: test3 +; CHECK: %for.body.lr.ph +; CHECK: std 2, 24(1) +; CHECK: %for.body +; CHECK-NOT: std 2, 24(1) +; CHECKBE-LABEL: test3 +; CHECKBE: %for.body.lr.ph +; CHECKBE: std 2, 40(1) +; CHECKBE: %for.body +; CHECKBE-NOT: std 2, 40(1) + + %cmp13 = icmp sgt i32 %lim, 0 + br i1 %cmp13, label %for.body.lr.ph, label %for.cond.cleanup + +for.body.lr.ph: ; preds = %entry + %sub = add nsw i32 %lim, -1 + br label %for.body + +for.cond.cleanup: ; preds = %if.end, %entry + %Sum.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %if.end ] + ret i32 %Sum.0.lcssa + +for.body: ; preds = %if.end, %for.body.lr.ph + %i.015 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ] + %Sum.014 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %if.end ] + %cmp1 = icmp eq i32 %i.015, %sub + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %call = tail call signext i32 %Func(i32 signext %sub) + %add = add nsw i32 %call, %Sum.014 + br label %if.end + +if.end: ; preds = %if.then, %for.body + %Sum.1 = phi i32 [ %add, %if.then ], [ %Sum.014, %for.body ] + %call2 = tail call signext i32 @func(i32 signext %i.015) + %add3 = add nsw i32 %call2, %Sum.1 + %inc = add nuw nsw i32 %i.015, 1 + %exitcond = icmp eq i32 %inc, %lim + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +declare signext i32 @func(i32 signext) Index: test/CodeGen/PowerPC/ppc64-func-desc-hoist.ll =================================================================== --- test/CodeGen/PowerPC/ppc64-func-desc-hoist.ll +++ test/CodeGen/PowerPC/ppc64-func-desc-hoist.ll @@ -15,7 +15,6 @@ ; INVFUNCDESC-DAG: ld [[REG3:[0-9]+]], 0(3) ; INVFUNCDESC: %for.body -; INVFUNCDESC: std 2, 40(1) ; INVFUNCDESC-DAG: mtctr [[REG3]] ; INVFUNCDESC-DAG: mr 11, [[REG2]] ; INVFUNCDESC-DAG: mr 2, [[REG1]] @@ -24,7 +23,6 @@ ; NONINVFUNCDESC-LABEL: @bar ; NONINVFUNCDESC: %for.body -; NONINVFUNCDESC: std 2, 40(1) ; NONINVFUNCDESC-DAG: ld 3, 0(30) ; NONINVFUNCDESC-DAG: ld 11, 16(30) ; NONINVFUNCDESC-DAG: ld 2, 8(30)