Index: include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- include/llvm/CodeGen/TargetInstrInfo.h +++ include/llvm/CodeGen/TargetInstrInfo.h @@ -1129,6 +1129,11 @@ /// Return true for post-incremented instructions. virtual bool isPostIncrement(const MachineInstr &MI) const { return false; } + /// Return true for invariant store instructions that can be hoisted. A store + /// is invariant if all the values in all register operands are the same + /// throughout the body of the function (not including prologue and epilogue). + virtual bool isInvariantStore(const MachineInstr &MI) const { return false; } + /// Returns true if the instruction is already predicated. virtual bool isPredicated(const MachineInstr &MI) const { return false; } Index: lib/CodeGen/MachineLICM.cpp =================================================================== --- lib/CodeGen/MachineLICM.cpp +++ lib/CodeGen/MachineLICM.cpp @@ -71,6 +71,10 @@ cl::desc("MachineLICM should sink instructions into " "loops to avoid register spills"), cl::init(false), cl::Hidden); +static cl::opt +HoistConstStores("hoist-const-stores", + cl::desc("Hoist invariant stores"), + cl::init(false), cl::Hidden); STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); @@ -82,6 +86,8 @@ "Number of hoisted machine instructions CSEed"); STATISTIC(NumPostRAHoisted, "Number of machine instructions hoisted out of loops post regalloc"); +STATISTIC(NumStoreConst, + "Number of stores of const phys reg hoisted out of loops"); namespace { @@ -708,6 +714,10 @@ MachineInstr *MI = &*MII; if (!Hoist(MI, Preheader)) UpdateRegPressure(MI); + // If we have hoisted an instruction that may store, it can only be a + // constant store. + else if (MI->mayStore()) + NumStoreConst++; MII = NextMII; } @@ -876,8 +886,10 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) { // Check if it's safe to move the instruction. bool DontMoveAcrossStore = true; - if (!I.isSafeToMove(AA, DontMoveAcrossStore)) + if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) && + !(HoistConstStores && I.mayStore() && TII->isInvariantStore(I))) { return false; + } // If it is load then check if it is guaranteed to execute by making sure that // it dominates all exiting blocks. If it doesn't, then there is a path out of @@ -1077,6 +1089,39 @@ RP[RPIdAndCost.first] += RPIdAndCost.second; } +// Return true if the input MI is a copy instruction that feeds an +// invariant store instruction. This means that the src of the copy +// has to satisfy isCallerPreservedPhysReg and atleast one of it's +// users should satisfy isInvariantStore. +static bool isCopyFeedingInvariantStore(const MachineInstr &MI) { + + if (!MI.isCopy()) + return false; + + const MachineFunction *MF = MI.getMF(); + const MachineRegisterInfo *MRI = &MF->getRegInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + // Check that we are copying a constant physical register. + unsigned CopySrcReg = MI.getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(CopySrcReg)) + return false; + + if (!TRI->isCallerPreservedPhysReg(CopySrcReg, *MF)) + return false; + + unsigned CopyDstReg = MI.getOperand(0).getReg(); + // Check if any of the uses of the copy are invariant stores. + assert (TargetRegisterInfo::isVirtualRegister(CopyDstReg) && + "copy dst is not a virtual reg"); + + for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) { + if (UseMI.mayStore() && TII->isInvariantStore(UseMI)) + return true; + } + return false; +} + /// Return true if it is potentially profitable to hoist the given loop /// invariant. bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { @@ -1095,6 +1140,9 @@ // - When hoisting the last use of a value in the loop, that value no longer // needs to be live in the loop. This lowers register pressure in the loop. + if (HoistConstStores && isCopyFeedingInvariantStore(MI)) + return true; + bool CheapInstr = IsCheapInstruction(MI); bool CreatesCopy = HasLoopPHIUse(&MI); Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -157,6 +157,9 @@ bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; + /// Return true for invariant store instructions that can be hoisted. + bool isInvariantStore(const MachineInstr &MI) const override; + bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const override; Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -300,6 +300,57 @@ return 0; } +// This is used to find the "true" source register for a +// Machine instruction. Returns the original SrcReg unless it is the target +// of a copy-like operation, in which case we chain backwards through all +// such operations to the ultimate source register. If a +// physical register is encountered, we stop the search. +static unsigned lookThruCopyLike(unsigned SrcReg, const MachineFunction *MF) { + const MachineRegisterInfo *MRI = &MF->getRegInfo(); + while (true) { + MachineInstr *MI = MRI->getVRegDef(SrcReg); + if (!MI->isCopyLike()) + return SrcReg; + + unsigned CopySrcReg; + if (MI->isCopy()) + CopySrcReg = MI->getOperand(1).getReg(); + else { + assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike"); + CopySrcReg = MI->getOperand(2).getReg(); + } + + if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) + return CopySrcReg; + + SrcReg = CopySrcReg; + } +} + +// This function iterates through all the operands of the input store MI and +// checks that each register operand statisfies isCallerPreservedPhysReg. +// This means, the value being stored and the address where it is being stored +// is constant throughout the body of the function. +bool PPCInstrInfo::isInvariantStore(const MachineInstr &MI) const { + const MachineFunction *MF = MI.getMF(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + // Check that all register operands are caller-preserved physical registers. + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + // If operand is a virtual register, check if it comes from a copy of a + // physical register. + if (TargetRegisterInfo::isVirtualRegister(Reg)) + Reg = lookThruCopyLike(MO.getReg(), MI.getMF()); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF())) + return false; + } + } + return true; +} + // For opcodes with the ReMaterializable flag set, this function is called to // verify the instruction is really rematable. bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, Index: lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.cpp +++ lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -65,6 +65,12 @@ EnableGPRToVecSpills("ppc-enable-gpr-to-vsr-spills", cl::Hidden, cl::init(false), cl::desc("Enable spills from gpr to vsr rather than stack")); +static cl::opt +StackPtrConst("ppc-stack-ptr-caller-preserved", + cl::desc("Consider R1 caller preserved so stack saves of " + "caller preserved registers can be LICM candidates"), + cl::init(false), cl::Hidden); + PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM) : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR, TM.isPPC64() ? 0 : 1, @@ -286,15 +292,22 @@ bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const { assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); - if (TM.isELFv2ABI() && PhysReg == PPC::X2) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!TM.isELFv2ABI()) + return false; + if (PhysReg == PPC::X2) // X2 is guaranteed to be preserved within a function if it is reserved. // The reason it's reserved is that it's the TOC pointer (and the function // uses the TOC). In functions where it isn't reserved (i.e. leaf functions // with no TOC access), we can't claim that it is preserved. return (getReservedRegs(MF).test(PPC::X2)); - } else { - return false; - } + if (StackPtrConst && (PhysReg == PPC::X1) && !MFI.hasVarSizedObjects() + && !MFI.hasOpaqueSPAdjustment()) + // The value of the stack pointer does not change within a function after + // the prologue and before the epilogue if there are no dynamic allocations + // and no inline asm which clobbers X1. + return true; + return false; } unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, Index: test/CodeGen/PowerPC/loop-hoist-toc-save.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/loop-hoist-toc-save.ll @@ -0,0 +1,110 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -hoist-const-stores -ppc-stack-ptr-caller-preserved < %s | FileCheck %s + +; Test hoist out of single loop +define signext i32 @test1(i32 signext %lim, i32 (i32)* nocapture %Func) { +entry: +; CHECK-LABEL: test1 +; CHECK: for.body.preheader +; CHECK: std 2, 24(1) +; CHECK: for.body +; CHECK-NOT: std 2, 24(1) + %cmp6 = icmp sgt i32 %lim, 0 + br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %Sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %Sum.0.lcssa + +for.body: ; preds = %for.body.preheader, %for.body + %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %Sum.07 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %call = tail call signext i32 %Func(i32 signext %i.08) + %add = add nsw i32 %call, %Sum.07 + %inc = add nuw nsw i32 %i.08, 1 + %exitcond = icmp eq i32 %inc, %lim + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +; Test hoist of nested loop goes to outter loop preheader +define signext i32 @test2(i32 signext %lim, i32 (i32)* nocapture %Func) { +entry: +; CHECK-LABEL: test2 +; CHECK: for.body4.lr.ph.preheader +; CHECK: std 2, 24(1) +; CHECK: for.body4.lr.ph +; CHECK-NOT: std 2, 24(1) + %cmp20 = icmp sgt i32 %lim, 0 + br i1 %cmp20, label %for.body4.lr.ph.preheader, label %for.cond.cleanup + +for.body4.lr.ph.preheader: ; preds = %entry + br label %for.body4.lr.ph + +for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry + %Sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.cond.cleanup3 ] + ret i32 %Sum.0.lcssa + +for.body4.lr.ph: ; preds = %for.body4.lr.ph.preheader, %for.cond.cleanup3 + %j.022 = phi i32 [ %inc6, %for.cond.cleanup3 ], [ 0, %for.body4.lr.ph.preheader ] + %Sum.021 = phi i32 [ %add, %for.cond.cleanup3 ], [ 0, %for.body4.lr.ph.preheader ] + br label %for.body4 + +for.cond.cleanup3: ; preds = %for.body4 + %inc6 = add nuw nsw i32 %j.022, 1 + %exitcond24 = icmp eq i32 %inc6, %lim + br i1 %exitcond24, label %for.cond.cleanup, label %for.body4.lr.ph + +for.body4: ; preds = %for.body4, %for.body4.lr.ph + %i.019 = phi i32 [ %j.022, %for.body4.lr.ph ], [ %inc, %for.body4 ] + %Sum.118 = phi i32 [ %Sum.021, %for.body4.lr.ph ], [ %add, %for.body4 ] + %call = tail call signext i32 %Func(i32 signext %i.019) + %add = add nsw i32 %call, %Sum.118 + %inc = add nuw nsw i32 %i.019, 1 + %exitcond = icmp eq i32 %inc, %lim + br i1 %exitcond, label %for.cond.cleanup3, label %for.body4 +} + +; Test hoist out of if statement with low branch probability +; FIXME: we shouldn't hoist in such cases as it could increase the number +; of stores after hoisting. +define signext i32 @test3(i32 signext %lim, i32 (i32)* nocapture %Func) { +entry: +; CHECK-LABEL: test3 +; CHECK: %for.body.lr.ph +; CHECK: std 2, 24(1) +; CHECK: %for.body +; CHECK-NOT: std 2, 24(1) + %cmp13 = icmp sgt i32 %lim, 0 + br i1 %cmp13, label %for.body.lr.ph, label %for.cond.cleanup + +for.body.lr.ph: ; preds = %entry + %sub = add nsw i32 %lim, -1 + br label %for.body + +for.cond.cleanup: ; preds = %if.end, %entry + %Sum.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %if.end ] + ret i32 %Sum.0.lcssa + +for.body: ; preds = %if.end, %for.body.lr.ph + %i.015 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ] + %Sum.014 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %if.end ] + %cmp1 = icmp eq i32 %i.015, %sub + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %call = tail call signext i32 %Func(i32 signext %sub) + %add = add nsw i32 %call, %Sum.014 + br label %if.end + +if.end: ; preds = %if.then, %for.body + %Sum.1 = phi i32 [ %add, %if.then ], [ %Sum.014, %for.body ] + %call2 = tail call signext i32 @func(i32 signext %i.015) + %add3 = add nsw i32 %call2, %Sum.1 + %inc = add nuw nsw i32 %i.015, 1 + %exitcond = icmp eq i32 %inc, %lim + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +declare signext i32 @func(i32 signext)