Index: llvm/lib/CodeGen/MachineLICM.cpp =================================================================== --- llvm/lib/CodeGen/MachineLICM.cpp +++ llvm/lib/CodeGen/MachineLICM.cpp @@ -215,9 +215,9 @@ void AddToLiveIns(MCRegister Reg); - bool IsLICMCandidate(MachineInstr &I); + bool IsLICMCandidate(MachineInstr &I, bool DontMoveAcrossStore = true); - bool IsLoopInvariantInst(MachineInstr &I); + bool IsLoopInvariantInst(MachineInstr &I, bool DontMoveAcrossStore = true); bool HasLoopPHIUse(const MachineInstr *MI) const; @@ -787,6 +787,18 @@ } } +static bool HasSuccessiveStoreInst(MachineInstr &I) { + auto End = I.getParent()->instr_end(); + for (auto It = I.getIterator(); It != End; ++It) { + if (It->isCall() || It->mayStore()) { + LLVM_DEBUG(dbgs() << "LICM: found call/store instruction.\n"); + return true; + } + } + LLVM_DEBUG(dbgs() << "LICM: No call/store instruction found.\n"); + return false; +} + /// Sink instructions into loops if profitable. This especially tries to prevent /// register spills caused by register pressure if there is little to no /// overhead moving instructions into loops. @@ -801,7 +813,9 @@ // We need to ensure that we can safely move this instruction into the loop. // As such, it must not have side-effects, e.g. such as a call has. LLVM_DEBUG(dbgs() << "LICM: Analysing sink candidate: " << *I); - if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) { + + if (IsLoopInvariantInst(*I, HasSuccessiveStoreInst(*I)) && + !HasLoopPHIUse(&*I)) { LLVM_DEBUG(dbgs() << "LICM: Added as sink candidate.\n"); Candidates.push_back(&*I); continue; @@ -809,7 +823,11 @@ LLVM_DEBUG(dbgs() << "LICM: Not added as sink candidate.\n"); } - for (MachineInstr *I : Candidates) { + // Walk the candidates in reverse order so that we start with the use + // of a def-use chain, if there is any. + // TODO: implement this in a better and don't rely on any ordering. + for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) { + MachineInstr *I = *It; const MachineOperand &MO = I->getOperand(0); if (!MO.isDef() || !MO.isReg() || !MO.getReg()) continue; @@ -1047,9 +1065,8 @@ /// Returns true if the instruction may be a suitable candidate for LICM. /// e.g. If the instruction is a call, then it's obviously not safe to hoist it. -bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { +bool MachineLICMBase::IsLICMCandidate(MachineInstr &I, bool DontMoveAcrossStore) { // Check if it's safe to move the instruction. - bool DontMoveAcrossStore = true; if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) && !(HoistConstStores && isInvariantStore(I, TRI, MRI))) { LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n"); @@ -1082,8 +1099,9 @@ /// I.e., all virtual register operands are defined outside of the loop, /// physical registers aren't accessed explicitly, and there are no side /// effects that aren't captured by the operands or other flags. -bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) { - if (!IsLICMCandidate(I)) { +bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I, + bool DontMoveAcrossStore) { + if (!IsLICMCandidate(I, DontMoveAcrossStore)) { LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n"); return false; } Index: llvm/test/CodeGen/AArch64/machine-licm-sink-instr.mir =================================================================== --- llvm/test/CodeGen/AArch64/machine-licm-sink-instr.mir +++ llvm/test/CodeGen/AArch64/machine-licm-sink-instr.mir @@ -143,7 +143,6 @@ ; CHECK: bb.1.for.body.preheader: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) ; CHECK: B %bb.3 ; CHECK: bb.2.for.cond.cleanup: ; CHECK: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %4, %bb.3 @@ -153,6 +152,7 @@ ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) ; CHECK: [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %5, %bb.3 ; CHECK: [[PHI2:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %4, %bb.3 + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRWui]] ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp ; CHECK: $w0 = COPY [[COPY1]]