Index: llvm/include/llvm/CodeGen/MachineLoopInfo.h =================================================================== --- llvm/include/llvm/CodeGen/MachineLoopInfo.h +++ llvm/include/llvm/CodeGen/MachineLoopInfo.h @@ -71,7 +71,10 @@ /// I.e., all virtual register operands are defined outside of the loop, /// physical registers aren't accessed explicitly, and there are no side /// effects that aren't captured by the operands or other flags. - bool isLoopInvariant(MachineInstr &I) const; + bool isLoopInvariant(MachineInstr &MI) const; + + /// Return true if the specified instruction is used by a phi node. + bool hasLoopPHIUse(const MachineInstr *MI) const; void dump() const; Index: llvm/lib/CodeGen/MachineLICM.cpp =================================================================== --- llvm/lib/CodeGen/MachineLICM.cpp +++ llvm/lib/CodeGen/MachineLICM.cpp @@ -214,8 +214,6 @@ bool IsLoopInvariantInst(MachineInstr &I); - bool HasLoopPHIUse(const MachineInstr *MI) const; - bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, Register Reg) const; @@ -995,41 +993,6 @@ return CurLoop->isLoopInvariant(I); } -/// Return true if the specified instruction is used by a phi node and hoisting -/// it could cause a copy to be inserted. -bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const { - SmallVector Work(1, MI); - do { - MI = Work.pop_back_val(); - for (const MachineOperand &MO : MI->operands()) { - if (!MO.isReg() || !MO.isDef()) - continue; - Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) - continue; - for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { - // A PHI may cause a copy to be inserted. - if (UseMI.isPHI()) { - // A PHI inside the loop causes a copy because the live range of Reg is - // extended across the PHI. - if (CurLoop->contains(&UseMI)) - return true; - // A PHI in an exit block can cause a copy to be inserted if the PHI - // has multiple predecessors in the loop with different values. - // For now, approximate by rejecting all exit blocks. - if (isExitBlock(UseMI.getParent())) - return true; - continue; - } - // Look past copies as well. - if (UseMI.isCopy() && CurLoop->contains(&UseMI)) - Work.push_back(&UseMI); - } - } - } while (!Work.empty()); - return false; -} - /// Compute operand latency between a def of 'Reg' and an use in the current /// loop, return true if the target considered it high. bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, @@ -1148,7 +1111,7 @@ return true; bool CheapInstr = IsCheapInstruction(MI); - bool CreatesCopy = HasLoopPHIUse(&MI); + bool CreatesCopy = CurLoop->hasLoopPHIUse(&MI); // Don't hoist a cheap instruction if it would create a copy in the loop. if (CheapInstr && CreatesCopy) { Index: llvm/lib/CodeGen/MachineLoopInfo.cpp =================================================================== --- llvm/lib/CodeGen/MachineLoopInfo.cpp +++ llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -149,13 +149,13 @@ return Preheader; } -bool MachineLoop::isLoopInvariant(MachineInstr &I) const { - MachineFunction *MF = I.getParent()->getParent(); +bool MachineLoop::isLoopInvariant(MachineInstr &MI) const { + MachineFunction *MF = MI.getParent()->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); // The instruction is loop invariant if all of its operands are. - for (const MachineOperand &MO : I.operands()) { + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; @@ -172,7 +172,7 @@ // However, if the physreg is known to always be caller saved/restored // then this use is safe to hoist. if (!MRI->isConstantPhysReg(Reg) && - !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF()))) + !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF()))) return false; // Otherwise it's safe to move. continue; @@ -202,6 +202,45 @@ return true; } +/// Return true if the specified instruction is used by a phi node and hoisting +/// it could cause a copy to be inserted. +bool MachineLoop::hasLoopPHIUse(const MachineInstr *MI) const { + const MachineFunction *MF = MI->getParent()->getParent(); + const MachineRegisterInfo *MRI = &MF->getRegInfo(); + SmallVector ExitBlocks; + getExitBlocks (ExitBlocks); + SmallVector Work(1, MI); + do { + MI = Work.pop_back_val(); + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) + continue; + for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { + // A PHI may cause a copy to be inserted. + if (UseMI.isPHI()) { + // A PHI inside the loop causes a copy because the live range of Reg is + // extended across the PHI. + if (contains(&UseMI)) + return true; + // A PHI in an exit block can cause a copy to be inserted if the PHI + // has multiple predecessors in the loop with different values. + // For now, approximate by rejecting all exit blocks. + if (is_contained(ExitBlocks, UseMI.getParent())) + return true; + continue; + } + // Look past copies as well. + if (UseMI.isCopy() && contains(&UseMI)) + Work.push_back(&UseMI); + } + } + } while (!Work.empty()); + return false; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void MachineLoop::dump() const { print(dbgs()); Index: llvm/lib/CodeGen/MachineSink.cpp =================================================================== --- llvm/lib/CodeGen/MachineSink.cpp +++ llvm/lib/CodeGen/MachineSink.cpp @@ -227,6 +227,12 @@ void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, SmallVectorImpl &Candidates); bool SinkIntoLoop(MachineLoop *L, MachineInstr &I); + bool IsSafeToMove(MachineLoop *L, MachineInstr &I, + MachineBasicBlock *SinkTo); + bool AreAliased(MachineInstr &First, MachineInstr &Second, + MachineBasicBlock *From, MachineBasicBlock *To, + DenseSet HandledDomBlocks, + bool &SawStore, bool &HasAliasedStore) ; bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -352,24 +358,6 @@ return true; } -/// Return true if this machine instruction loads from global offset table or -/// constant pool. -static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { - assert(MI.mayLoad() && "Expected MI that loads!"); - - // If we lost memory operands, conservatively assume that the instruction - // reads from everything.. - if (MI.memoperands_empty()) - return true; - - for (MachineMemOperand *MemOp : MI.memoperands()) - if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) - if (PSV->isGOT() || PSV->isConstantPool()) - return true; - - return false; -} - void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, SmallVectorImpl &Candidates) { for (auto &MI : *BB) { @@ -379,27 +367,28 @@ "target\n"); continue; } - if (!L->isLoopInvariant(MI)) { + // If physical registers are used, then this is marked as not loop + // invariant. This can be the case if the preheader is the entry block, and + // when there are copy instructions of function arguments that are passed + // through registers. + if (!L->isLoopInvariant(MI) || L->hasLoopPHIUse(&MI)) { LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n"); continue; } - bool DontMoveAcrossStore = true; - if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n"); - continue; - } - if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n"); - continue; - } if (MI.isConvergent()) continue; + // Skip instruction that don't produce values, like branches and certain + // store instructions (that e.g. don't post-increment). const MachineOperand &MO = MI.getOperand(0); - if (!MO.isReg() || !MO.getReg() || !MO.isDef()) + if (!MO.isReg() || !MO.getReg() || !MO.isDef()) { + LLVM_DEBUG(dbgs() << "LoopSink: Instruction does not define a value.\n"); continue; - if (!MRI->hasOneDef(MO.getReg())) + } + if (!MRI->hasOneDef(MO.getReg())) { + LLVM_DEBUG(dbgs() << "LoopSink: Instruction does not have 1 def.\n"); continue; + } LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n"); Candidates.push_back(&MI); @@ -470,8 +459,13 @@ // of a def-use chain, if there is any. for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) { MachineInstr *I = *It; + + // TODO: This is conservative because we bail as soon as we find one + // instruction that cannot be sunk. Better is to do this per def-use + // chain, so we try a next chain if one fails. if (!SinkIntoLoop(L, *I)) break; + EverMadeChange = true; ++NumLoopSunk; } @@ -1155,29 +1149,10 @@ } for (MachineInstr &I : *BB) { - // Treat as alias conservatively for a call or an ordered memory - // operation. - if (I.isCall() || I.hasOrderedMemoryRef()) { - for (auto *DomBB : HandledDomBlocks) { - if (DomBB != BB && DT->dominates(DomBB, BB)) - HasStoreCache[std::make_pair(DomBB, To)] = true; - else if(DomBB != BB && DT->dominates(BB, DomBB)) - HasStoreCache[std::make_pair(From, DomBB)] = true; - } - HasStoreCache[BlockPair] = true; + bool Aliased = AreAliased(I, MI, From, To, HandledBlocks, SawStore, + HasAliasedStore); + if (Aliased && (I.isCall() || I.hasOrderedMemoryRef())) return true; - } - - if (I.mayStore()) { - SawStore = true; - // We still have chance to sink MI if all stores between are not - // aliased to MI. - // Cache all store instructions, so that we don't need to go through - // all From reachable blocks for next load instruction. - if (I.mayAlias(AA, MI, false)) - HasAliasedStore = true; - StoreInstrCache[BlockPair].push_back(&I); - } } } } @@ -1187,6 +1162,87 @@ return HasAliasedStore; } +bool MachineSinking::AreAliased(MachineInstr &First, MachineInstr &Second, + MachineBasicBlock *From, MachineBasicBlock *To, + DenseSet HandledDomBlocks, bool &SawStore, + bool &HasAliasedStore) { + MachineBasicBlock *BB = First.getParent(); + auto BlockPair = std::make_pair(From, To); + + if (First.isCall() || Second.hasOrderedMemoryRef()) { + for (auto *DomBB : HandledDomBlocks) { + if (DomBB != BB && DT->dominates(DomBB, BB)) + HasStoreCache[std::make_pair(DomBB, To)] = true; + else if(DomBB != BB && DT->dominates(BB, DomBB)) + HasStoreCache[std::make_pair(From, DomBB)] = true; + } + HasStoreCache[BlockPair] = true; + return true; + } + + if (First.mayStore()) { + SawStore = true; + // We still have chance to sink MI if all stores between are not + // aliased to MI. + // Cache all store instructions, so that we don't need to go through + // all From reachable blocks for next load instruction. + if (First.mayAlias(AA, Second, false)) + HasAliasedStore = true; + StoreInstrCache[BlockPair].push_back(&First); + } + + // If there is no store at all, cache the result. + if (!SawStore) + HasStoreCache[BlockPair] = false; + return HasAliasedStore; +} + +bool MachineSinking::IsSafeToMove(MachineLoop *L, MachineInstr &I, + MachineBasicBlock *SinkTo) { + if (LI->getLoopFor(SinkTo) != L) + return false; + + auto End = I.getParent()->instr_end(); + auto It = I.getIterator(); + + // 1) First, analyse all instruction from the current instruction I to the end + // of its block. + bool HasAliasedStore = false; + bool SawStore = false; + ++It; + for ( ; It != End; ++It) { + if (AreAliased(*It, I, I.getParent(), SinkTo, {}, SawStore, + HasAliasedStore)) { + LLVM_DEBUG(dbgs() << "LoopSink: Alias pair found!\n"); + return false; + } + LLVM_DEBUG(dbgs() << "LoopSink: Not aliased with : " << *It); + } + + // This isSafeToMove check is not doing any alias analysis, but checks + // different instruction types, side-effects, etc. It uses 'SawStore' that is + // set in 1) which analyses the block of the sink instruction, and in 2) alias + // analysis of the loop blocks is performed. + SawStore = false; + if (!I.isSafeToMove(AA, SawStore)) { + LLVM_DEBUG(dbgs() << "LoopSink: Not safe to move!\n"); + return false; + } + + // 2) Next, check all instructions in the loop to see if there are aliases. + for (auto *BB : L->blocks()) { + for (auto &CurI : *BB) { + if (AreAliased(CurI, I, I.getParent(), SinkTo, {}, SawStore, HasAliasedStore)) { + LLVM_DEBUG(dbgs() << "LoopSink: Alias found in loop block: " << CurI); + return false; + } + LLVM_DEBUG(dbgs() << "LoopSink: Not aliased with loop ins: " << CurI); + } + } + LLVM_DEBUG(dbgs() << "LoopSink: Instruction not aliased, safe to move!\n"); + return true; +} + /// Sink instructions into loops if profitable. This especially tries to prevent /// register spills caused by register pressure if there is little to no /// overhead moving instructions into loops. @@ -1209,12 +1265,7 @@ // FIXME: Come up with a proper cost model that estimates whether sinking // the instruction (and thus possibly executing it on every loop // iteration) is more expensive than a register. - // For now assumes that copies are cheap and thus almost always worth it. - if (!MI.isCopy()) { - LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n"); - CanSink = false; - break; - } + if (!SinkBlock) { SinkBlock = MI.getParent(); LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: " @@ -1243,6 +1294,10 @@ LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n"); return false; } + if (!IsSafeToMove(L, I, SinkBlock)) { + LLVM_DEBUG(dbgs() << "LoopSink: Not safe to move\n"); + return false; + } LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n"); SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I); Index: llvm/test/CodeGen/AArch64/loop-sink.mir =================================================================== --- llvm/test/CodeGen/AArch64/loop-sink.mir +++ llvm/test/CodeGen/AArch64/loop-sink.mir @@ -1,11 +1,13 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills %s -o - 2>&1 | FileCheck %s +# RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills -verify-machineinstrs %s -o - 2>&1 | FileCheck %s + --- | target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64" @A = external dso_local global [100 x i32], align 4 %struct.A = type { i32, i32, i32, i32, i32, i32 } + @G = external dso_local local_unnamed_addr global i32, align 4 define void @cant_sink_adds_call_in_block(i8* nocapture readonly %input, %struct.A* %a) { %1 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 1 @@ -129,7 +131,7 @@ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } - define i32 @use_is_not_a_copy(i32 %n) { + define i32 @do_sink_use_is_not_a_copy(i32 %n) { entry: %cmp63 = icmp sgt i32 %n, 0 br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup @@ -151,7 +153,7 @@ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } - define dso_local void @sink_add(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32 %n) local_unnamed_addr #0 { + define dso_local void @cant_sink_load_add_chain_loop_phi_use(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32 %n) local_unnamed_addr #0 { entry: %0 = load i32, i32* %read, align 4, !tbaa !6 %cmp10 = icmp sgt i32 %n, 0 @@ -177,63 +179,169 @@ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } - define dso_local void @store_after_add(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32* nocapture %store, i32 %n) local_unnamed_addr #0 { + define dso_local void @cant_sink_multi_block_loop_with_call(i32* noalias nocapture %read, i32* noalias nocapture %write, i32* nocapture readnone %store, i32 %n) local_unnamed_addr #0 { entry: - %0 = load i32, i32* %read, align 4, !tbaa !6 - %cmp10 = icmp sgt i32 %n, 0 - br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup + %0 = load i32, i32* %read, align 4 + store i32 %n, i32* %read, align 4 + %cmp12 = icmp sgt i32 %n, 0 + br i1 %cmp12, label %for.body.lr.ph, label %for.cond.cleanup + + for.body.lr.ph: ; preds = %entry + %1 = load i32, i32* @G, align 4 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %for.body.us.preheader, label %for.body.preheader + + for.body.preheader: ; preds = %for.body.lr.ph + %3 = add i32 %0, 42 + br label %for.body + + for.body.us.preheader: ; preds = %for.body.lr.ph + %4 = add i32 %n, -1 + %5 = add i32 %0, 42 + br label %for.body.us + + for.body.us: ; preds = %for.body.us.preheader, %for.inc.us.for.body.us_crit_edge + %lsr.iv2 = phi i32 [ %5, %for.body.us.preheader ], [ %lsr.iv.next3, %for.inc.us.for.body.us_crit_edge ] + %lsr.iv = phi i32 [ %4, %for.body.us.preheader ], [ %lsr.iv.next, %for.inc.us.for.body.us_crit_edge ] + %6 = phi i32 [ %.pre, %for.inc.us.for.body.us_crit_edge ], [ 0, %for.body.us.preheader ] + %sum.013.us = phi i32 [ %sum.1.us, %for.inc.us.for.body.us_crit_edge ], [ %n, %for.body.us.preheader ] + %tobool.not.us = icmp eq i32 %6, 0 + br i1 %tobool.not.us, label %if.else.us, label %if.then.us + + if.then.us: ; preds = %for.body.us + %div.us = sdiv i32 %sum.013.us, %lsr.iv2 + br label %for.inc.us + + if.else.us: ; preds = %for.body.us + tail call void @H() #2 + br label %for.inc.us + + for.inc.us: ; preds = %if.else.us, %if.then.us + %sum.1.us = phi i32 [ %div.us, %if.then.us ], [ %sum.013.us, %if.else.us ] + %exitcond.not = icmp eq i32 %lsr.iv, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.inc.us.for.body.us_crit_edge, !llvm.loop !10 + + for.inc.us.for.body.us_crit_edge: ; preds = %for.inc.us + %.pre = load i32, i32* @G, align 4 + %lsr.iv.next = add i32 %lsr.iv, -1 + %lsr.iv.next3 = add i32 %lsr.iv2, 1 + br label %for.body.us + + for.cond.cleanup: ; preds = %for.body, %for.inc.us, %entry + %sum.0.lcssa = phi i32 [ %n, %entry ], [ %sum.1.us, %for.inc.us ], [ %div, %for.body ] + store i32 %sum.0.lcssa, i32* %write, align 4, !tbaa !6 + ret void + + for.body: ; preds = %for.body.preheader, %for.body + %lsr.iv6 = phi i32 [ %3, %for.body.preheader ], [ %lsr.iv.next7, %for.body ] + %lsr.iv4 = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next5, %for.body ] + %sum.013 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ] + %div = sdiv i32 %sum.013, %lsr.iv6 + %lsr.iv.next5 = add i32 %lsr.iv4, -1 + %lsr.iv.next7 = add i32 %lsr.iv6, 1 + %exitcond17.not = icmp eq i32 %lsr.iv.next5, 0 + br i1 %exitcond17.not, label %for.cond.cleanup, label %for.body + } + + define dso_local void @do_sink_load_add(float* noalias nocapture readonly %read, float* noalias nocapture %write, float* nocapture readnone %store, i32 %n) local_unnamed_addr #0 { + entry: + %0 = load float, float* %read, align 4, !tbaa !6 + %add = fadd fast float %0, 4.200000e+01 + %cmp8 = icmp sgt i32 %n, 0 + br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: ; preds = %entry - %1 = add i32 %0, 42 - store i32 43, i32* %store, align 4, !tbaa !6 br label %for.body for.cond.cleanup: ; preds = %for.body, %entry - %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ] - store i32 %sum.0.lcssa, i32* %write, align 4, !tbaa !6 + %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add1, %for.body ] + store float %sum.0.lcssa, float* %write, align 4 ret void for.body: ; preds = %for.body.preheader, %for.body - %lsr.iv1 = phi i32 [ %1, %for.body.preheader ], [ %lsr.iv.next2, %for.body ] %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] - %sum.011 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ] - %div = sdiv i32 %sum.011, %lsr.iv1 + %sum.09 = phi float [ %add1, %for.body ], [ 0.000000e+00, %for.body.preheader ] + %add1 = fadd fast float %add, %sum.09 %lsr.iv.next = add i32 %lsr.iv, -1 - %lsr.iv.next2 = add i32 %lsr.iv1, 1 %exitcond.not = icmp eq i32 %lsr.iv.next, 0 - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } - define dso_local void @aliased_store_after_add(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32* nocapture %store, i32 %n) local_unnamed_addr #0 { + define dso_local void @do_sink_no_aliased_store(float* noalias nocapture readonly %read, float* noalias nocapture %write, float* nocapture %store, i32 %n, float %m) local_unnamed_addr #0 { entry: - %0 = load i32, i32* %read, align 4, !tbaa !6 - %cmp10 = icmp sgt i32 %n, 0 - br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: ; preds = %entry - %1 = add i32 %0, 42 - store i32 43, i32* %read, align 4, !tbaa !6 + %0 = load float, float* %read, align 4 + store float %m, float* %store, align 4 br label %for.body for.cond.cleanup: ; preds = %for.body, %entry - %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ] - store i32 %sum.0.lcssa, i32* %write, align 4, !tbaa !6 + %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] + store float %sum.0.lcssa, float* %write, align 4 ret void for.body: ; preds = %for.body.preheader, %for.body - %lsr.iv1 = phi i32 [ %1, %for.body.preheader ], [ %lsr.iv.next2, %for.body ] %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] - %sum.011 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ] - %div = sdiv i32 %sum.011, %lsr.iv1 + %sum.07 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ] + %add = fadd fast float %sum.07, %0 %lsr.iv.next = add i32 %lsr.iv, -1 - %lsr.iv.next2 = add i32 %lsr.iv1, 1 %exitcond.not = icmp eq i32 %lsr.iv.next, 0 - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } + define dso_local void @cant_sink_load_aliased_store(float* nocapture readonly %read, float* nocapture %write, float* nocapture %store, i32 %n, float %m) local_unnamed_addr #0 { + entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup + + for.body.preheader: ; preds = %entry + %0 = load float, float* %read, align 4 + store float %m, float* %store, align 4 + br label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] + store float %sum.0.lcssa, float* %write, align 4 + ret void + + for.body: ; preds = %for.body.preheader, %for.body + %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] + %sum.07 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ] + %add = fadd fast float %sum.07, %0 + %lsr.iv.next = add i32 %lsr.iv, -1 + %exitcond.not = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + } + + define dso_local void @cant_sink_aliased_store_in_loop(float* nocapture readonly %read, float* nocapture %write, float* nocapture %store, i32 %n, float %m) local_unnamed_addr #0 { + entry: + %0 = load float, float* %read, align 4, !tbaa !6 + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup + + for.body.preheader: ; preds = %entry + br label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] + store float %sum.0.lcssa, float* %write, align 4 + ret void + + for.body: ; preds = %for.body.preheader, %for.body + %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] + %sum.07 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ] + %add = fadd fast float %sum.07, %0 + store float %m, float* %store, align 4 + %lsr.iv.next = add i32 %lsr.iv, -1 + %exitcond.not = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + } declare i32 @use(i32) declare void @_Z6assignPj(i32*) + declare void @H() !6 = !{!7, !7, i64 0} !7 = !{!"int", !8, i64 0} @@ -871,7 +979,7 @@ ... --- -name: use_is_not_a_copy +name: do_sink_use_is_not_a_copy alignment: 4 exposesReturnsTwice: false legalized: false @@ -921,7 +1029,7 @@ constants: [] machineFunctionInfo: {} body: | - ; CHECK-LABEL: name: use_is_not_a_copy + ; CHECK-LABEL: name: do_sink_use_is_not_a_copy ; CHECK: bb.0.entry: ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) ; CHECK: liveins: $w0 @@ -931,8 +1039,6 @@ ; CHECK: B %bb.1 ; CHECK: bb.1.for.body.preheader: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) ; CHECK: B %bb.3 ; CHECK: bb.2.for.cond.cleanup: ; CHECK: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %4, %bb.3 @@ -942,6 +1048,8 @@ ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) ; CHECK: [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %5, %bb.3 ; CHECK: [[PHI2:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %4, %bb.3 + ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI2]], [[LDRWui]] ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SDIVWr]] ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv @@ -983,7 +1091,7 @@ ... --- -name: sink_add +name: cant_sink_load_add_chain_loop_phi_use alignment: 16 exposesReturnsTwice: false legalized: false @@ -1041,7 +1149,7 @@ constants: [] machineFunctionInfo: {} body: | - ; CHECK-LABEL: name: sink_add + ; CHECK-LABEL: name: cant_sink_load_add_chain_loop_phi_use ; CHECK: bb.0.entry: ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) ; CHECK: liveins: $x0, $x1, $w2 @@ -1115,7 +1223,7 @@ ... --- -name: store_after_add +name: cant_sink_multi_block_loop_with_call alignment: 16 exposesReturnsTwice: false legalized: false @@ -1127,29 +1235,286 @@ registers: - { id: 0, class: gpr32sp, preferred-register: '' } - { id: 1, class: gpr32all, preferred-register: '' } - - { id: 2, class: gpr32, preferred-register: '' } - - { id: 3, class: gpr32common, preferred-register: '' } - - { id: 4, class: gpr32sp, preferred-register: '' } - - { id: 5, class: gpr32, preferred-register: '' } - - { id: 6, class: gpr32all, preferred-register: '' } - - { id: 7, class: gpr32all, preferred-register: '' } + - { id: 2, class: gpr32all, preferred-register: '' } + - { id: 3, class: gpr32all, preferred-register: '' } + - { id: 4, class: gpr32common, preferred-register: '' } + - { id: 5, class: gpr32common, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: gpr32, preferred-register: '' } - { id: 8, class: gpr32all, preferred-register: '' } - - { id: 9, class: gpr64common, preferred-register: '' } - - { id: 10, class: gpr64common, preferred-register: '' } - - { id: 11, class: gpr64common, preferred-register: '' } - - { id: 12, class: gpr32common, preferred-register: '' } - - { id: 13, class: gpr32common, preferred-register: '' } + - { id: 9, class: gpr32all, preferred-register: '' } + - { id: 10, class: gpr32all, preferred-register: '' } + - { id: 11, class: gpr32all, preferred-register: '' } + - { id: 12, class: gpr32all, preferred-register: '' } + - { id: 13, class: gpr32, preferred-register: '' } + - { id: 14, class: gpr32common, preferred-register: '' } + - { id: 15, class: gpr32sp, preferred-register: '' } + - { id: 16, class: gpr32, preferred-register: '' } + - { id: 17, class: gpr32all, preferred-register: '' } + - { id: 18, class: gpr32all, preferred-register: '' } + - { id: 19, class: gpr32all, preferred-register: '' } + - { id: 20, class: gpr64common, preferred-register: '' } + - { id: 21, class: gpr64common, preferred-register: '' } + - { id: 22, class: gpr64, preferred-register: '' } + - { id: 23, class: gpr32common, preferred-register: '' } + - { id: 24, class: gpr32common, preferred-register: '' } + - { id: 25, class: gpr32, preferred-register: '' } + - { id: 26, class: gpr64common, preferred-register: '' } + - { id: 27, class: gpr32, preferred-register: '' } + - { id: 28, class: gpr32sp, preferred-register: '' } + - { id: 29, class: gpr32, preferred-register: '' } + - { id: 30, class: gpr32, preferred-register: '' } + - { id: 31, class: gpr32sp, preferred-register: '' } + - { id: 32, class: gpr32all, preferred-register: '' } + - { id: 33, class: gpr32, preferred-register: '' } + - { id: 34, class: gpr32sp, preferred-register: '' } + - { id: 35, class: gpr32all, preferred-register: '' } + - { id: 36, class: gpr32, preferred-register: '' } + - { id: 37, class: gpr64common, preferred-register: '' } + - { id: 38, class: gpr32, preferred-register: '' } + - { id: 39, class: gpr32, preferred-register: '' } + - { id: 40, class: gpr32sp, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%20' } + - { reg: '$x1', virtual-reg: '%21' } + - { reg: '$w3', virtual-reg: '%23' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: cant_sink_multi_block_loop_with_call + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x50000000), %bb.9(0x30000000) + ; CHECK: liveins: $x0, $x1, $w3 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w3 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY2]], 0 :: (load 4 from %ir.read, !tbaa !0) + ; CHECK: STRWui [[COPY]], [[COPY2]], 0 :: (store 4 into %ir.read, !tbaa !0) + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: Bcc 11, %bb.9, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1.for.body.lr.ph: + ; CHECK: successors: %bb.3(0x30000000), %bb.2(0x50000000) + ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @G + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @G :: (dereferenceable load 4 from @G, !tbaa !0) + ; CHECK: CBZW killed [[LDRWui1]], %bb.3 + ; CHECK: B %bb.2 + ; CHECK: bb.2.for.body.preheader: + ; CHECK: successors: %bb.10(0x80000000) + ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0 + ; CHECK: [[COPY3:%[0-9]+]]:gpr32all = COPY [[ADDWri]] + ; CHECK: B %bb.10 + ; CHECK: bb.3.for.body.us.preheader: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: [[COPY4:%[0-9]+]]:gpr32all = COPY [[SUBSWri]] + ; CHECK: [[ADDWri1:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0 + ; CHECK: [[COPY5:%[0-9]+]]:gpr32all = COPY $wzr + ; CHECK: [[COPY6:%[0-9]+]]:gpr32all = COPY [[COPY5]] + ; CHECK: [[COPY7:%[0-9]+]]:gpr32all = COPY [[ADDWri1]] + ; CHECK: bb.4.for.body.us: + ; CHECK: successors: %bb.6(0x30000000), %bb.5(0x50000000) + ; CHECK: [[PHI:%[0-9]+]]:gpr32common = PHI [[COPY7]], %bb.3, %12, %bb.8 + ; CHECK: [[PHI1:%[0-9]+]]:gpr32common = PHI [[COPY4]], %bb.3, %11, %bb.8 + ; CHECK: [[PHI2:%[0-9]+]]:gpr32 = PHI [[COPY6]], %bb.3, %10, %bb.8 + ; CHECK: [[PHI3:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.3, %9, %bb.8 + ; CHECK: CBZW [[PHI2]], %bb.6 + ; CHECK: B %bb.5 + ; CHECK: bb.5.if.then.us: + ; CHECK: successors: %bb.7(0x80000000) + ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI3]], [[PHI]] + ; CHECK: [[COPY8:%[0-9]+]]:gpr32all = COPY [[SDIVWr]] + ; CHECK: B %bb.7 + ; CHECK: bb.6.if.else.us: + ; CHECK: successors: %bb.7(0x80000000) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: BL @H, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: bb.7.for.inc.us: + ; CHECK: successors: %bb.9(0x04000000), %bb.8(0x7c000000) + ; CHECK: [[PHI4:%[0-9]+]]:gpr32all = PHI [[COPY8]], %bb.5, [[PHI3]], %bb.6 + ; CHECK: CBZW [[PHI1]], %bb.9 + ; CHECK: B %bb.8 + ; CHECK: bb.8.for.inc.us.for.body.us_crit_edge: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: [[LDRWui2:%[0-9]+]]:gpr32 = LDRWui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @G :: (dereferenceable load 4 from @G, !tbaa !0) + ; CHECK: [[COPY9:%[0-9]+]]:gpr32all = COPY [[LDRWui2]] + ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def dead $nzcv + ; CHECK: [[COPY10:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]] + ; CHECK: [[ADDWri2:%[0-9]+]]:gpr32sp = ADDWri [[PHI]], 1, 0 + ; CHECK: [[COPY11:%[0-9]+]]:gpr32all = COPY [[ADDWri2]] + ; CHECK: B %bb.4 + ; CHECK: bb.9.for.cond.cleanup: + ; CHECK: [[PHI5:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.0, %17, %bb.10, [[PHI4]], %bb.7 + ; CHECK: STRWui [[PHI5]], [[COPY1]], 0 :: (store 4 into %ir.write, !tbaa !0) + ; CHECK: RET_ReallyLR + ; CHECK: bb.10.for.body: + ; CHECK: successors: %bb.9(0x04000000), %bb.10(0x7c000000) + ; CHECK: [[PHI6:%[0-9]+]]:gpr32common = PHI [[COPY3]], %bb.2, %19, %bb.10 + ; CHECK: [[PHI7:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.2, %18, %bb.10 + ; CHECK: [[PHI8:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.2, %17, %bb.10 + ; CHECK: [[SDIVWr1:%[0-9]+]]:gpr32 = SDIVWr [[PHI8]], [[PHI6]] + ; CHECK: [[COPY12:%[0-9]+]]:gpr32all = COPY [[SDIVWr1]] + ; CHECK: [[SUBSWri2:%[0-9]+]]:gpr32 = SUBSWri [[PHI7]], 1, 0, implicit-def $nzcv + ; CHECK: [[COPY13:%[0-9]+]]:gpr32all = COPY [[SUBSWri2]] + ; CHECK: [[ADDWri3:%[0-9]+]]:gpr32sp = ADDWri [[PHI6]], 1, 0 + ; CHECK: [[COPY14:%[0-9]+]]:gpr32all = COPY [[ADDWri3]] + ; CHECK: Bcc 0, %bb.9, implicit $nzcv + ; CHECK: B %bb.10 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.9(0x30000000) + liveins: $x0, $x1, $w3 + + %23:gpr32common = COPY $w3 + %21:gpr64common = COPY $x1 + %20:gpr64common = COPY $x0 + %24:gpr32common = LDRWui %20, 0 :: (load 4 from %ir.read, !tbaa !6) + STRWui %23, %20, 0 :: (store 4 into %ir.read, !tbaa !6) + %25:gpr32 = SUBSWri %23, 1, 0, implicit-def $nzcv + Bcc 11, %bb.9, implicit $nzcv + B %bb.1 + + bb.1.for.body.lr.ph: + successors: %bb.3(0x30000000), %bb.2(0x50000000) + + %26:gpr64common = ADRP target-flags(aarch64-page) @G + %27:gpr32 = LDRWui %26, target-flags(aarch64-pageoff, aarch64-nc) @G :: (dereferenceable load 4 from @G, !tbaa !6) + CBZW killed %27, %bb.3 + B %bb.2 + + bb.2.for.body.preheader: + successors: %bb.10(0x80000000) + + %28:gpr32sp = ADDWri %24, 42, 0 + %1:gpr32all = COPY %28 + B %bb.10 + + bb.3.for.body.us.preheader: + successors: %bb.4(0x80000000) + + %2:gpr32all = COPY %25 + %34:gpr32sp = ADDWri %24, 42, 0 + %35:gpr32all = COPY $wzr + %32:gpr32all = COPY %35 + %3:gpr32all = COPY %34 + + bb.4.for.body.us: + successors: %bb.6(0x30000000), %bb.5(0x50000000) + + %4:gpr32common = PHI %3, %bb.3, %12, %bb.8 + %5:gpr32common = PHI %2, %bb.3, %11, %bb.8 + %6:gpr32 = PHI %32, %bb.3, %10, %bb.8 + %7:gpr32 = PHI %23, %bb.3, %9, %bb.8 + CBZW %6, %bb.6 + B %bb.5 + + bb.5.if.then.us: + successors: %bb.7(0x80000000) + + %36:gpr32 = SDIVWr %7, %4 + %8:gpr32all = COPY %36 + B %bb.7 + + bb.6.if.else.us: + successors: %bb.7(0x80000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @H, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.7.for.inc.us: + successors: %bb.9(0x04000000), %bb.8(0x7c000000) + + %9:gpr32all = PHI %8, %bb.5, %7, %bb.6 + CBZW %5, %bb.9 + B %bb.8 + + bb.8.for.inc.us.for.body.us_crit_edge: + successors: %bb.4(0x80000000) + + %38:gpr32 = LDRWui %26, target-flags(aarch64-pageoff, aarch64-nc) @G :: (dereferenceable load 4 from @G, !tbaa !6) + %10:gpr32all = COPY %38 + %39:gpr32 = SUBSWri %5, 1, 0, implicit-def dead $nzcv + %11:gpr32all = COPY %39 + %40:gpr32sp = ADDWri %4, 1, 0 + %12:gpr32all = COPY %40 + B %bb.4 + + bb.9.for.cond.cleanup: + %13:gpr32 = PHI %23, %bb.0, %17, %bb.10, %9, %bb.7 + STRWui %13, %21, 0 :: (store 4 into %ir.write, !tbaa !6) + RET_ReallyLR + + bb.10.for.body: + successors: %bb.9(0x04000000), %bb.10(0x7c000000) + + %14:gpr32common = PHI %1, %bb.2, %19, %bb.10 + %15:gpr32sp = PHI %23, %bb.2, %18, %bb.10 + %16:gpr32 = PHI %23, %bb.2, %17, %bb.10 + %29:gpr32 = SDIVWr %16, %14 + %17:gpr32all = COPY %29 + %30:gpr32 = SUBSWri %15, 1, 0, implicit-def $nzcv + %18:gpr32all = COPY %30 + %31:gpr32sp = ADDWri %14, 1, 0 + %19:gpr32all = COPY %31 + Bcc 0, %bb.9, implicit $nzcv + B %bb.10 + +... +--- +name: do_sink_load_add +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: gpr32sp, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32all, preferred-register: '' } + - { id: 6, class: gpr64common, preferred-register: '' } + - { id: 7, class: gpr64common, preferred-register: '' } + - { id: 8, class: gpr64, preferred-register: '' } + - { id: 9, class: gpr32common, preferred-register: '' } + - { id: 10, class: fpr32, preferred-register: '' } + - { id: 11, class: fpr32, preferred-register: '' } + - { id: 12, class: gpr32, preferred-register: '' } + - { id: 13, class: fpr32, preferred-register: '' } - { id: 14, class: gpr32, preferred-register: '' } - - { id: 15, class: gpr32, preferred-register: '' } - - { id: 16, class: gpr32sp, preferred-register: '' } - - { id: 17, class: gpr32, preferred-register: '' } - - { id: 18, class: gpr32, preferred-register: '' } - - { id: 19, class: gpr32sp, preferred-register: '' } + - { id: 15, class: fpr32, preferred-register: '' } + - { id: 16, class: gpr32, preferred-register: '' } liveins: - - { reg: '$x0', virtual-reg: '%9' } - - { reg: '$x1', virtual-reg: '%10' } - - { reg: '$x2', virtual-reg: '%11' } - - { reg: '$w3', virtual-reg: '%12' } + - { reg: '$x0', virtual-reg: '%6' } + - { reg: '$x1', virtual-reg: '%7' } + - { reg: '$w3', virtual-reg: '%9' } frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false @@ -1176,87 +1541,212 @@ constants: [] machineFunctionInfo: {} body: | - ; CHECK-LABEL: name: store_after_add + ; CHECK-LABEL: name: do_sink_load_add ; CHECK: bb.0.entry: - ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) - ; CHECK: liveins: $x0, $x1, $x2, $w3 + ; CHECK: successors: %bb.1(0x50000000), %bb.4(0x30000000) + ; CHECK: liveins: $x0, $x1, $w3 ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w3 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x2 - ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY $x1 - ; CHECK: [[COPY3:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY $x0 ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv - ; CHECK: Bcc 11, %bb.2, implicit $nzcv - ; CHECK: B %bb.1 + ; CHECK: Bcc 10, %bb.1, implicit $nzcv + ; CHECK: bb.4: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: B %bb.2 ; CHECK: bb.1.for.body.preheader: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY3]], 0 :: (load 4 from %ir.read, !tbaa !0) - ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0 - ; CHECK: [[COPY4:%[0-9]+]]:gpr32all = COPY [[ADDWri]] - ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 43 - ; CHECK: STRWui killed [[MOVi32imm]], [[COPY1]], 0 :: (store 4 into %ir.store, !tbaa !0) + ; CHECK: [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0 ; CHECK: B %bb.3 ; CHECK: bb.2.for.cond.cleanup: - ; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.0, %6, %bb.3 - ; CHECK: STRWui [[PHI]], [[COPY2]], 0 :: (store 4 into %ir.write, !tbaa !0) + ; CHECK: [[PHI:%[0-9]+]]:fpr32 = PHI [[FMOVS0_]], %bb.4, %4, %bb.3 + ; CHECK: STRSui [[PHI]], [[COPY1]], 0 :: (store 4 into %ir.write, !tbaa !0) ; CHECK: RET_ReallyLR ; CHECK: bb.3.for.body: ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) - ; CHECK: [[PHI1:%[0-9]+]]:gpr32common = PHI [[COPY4]], %bb.1, %8, %bb.3 - ; CHECK: [[PHI2:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %7, %bb.3 - ; CHECK: [[PHI3:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %6, %bb.3 - ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI3]], [[PHI1]] - ; CHECK: [[COPY5:%[0-9]+]]:gpr32all = COPY [[SDIVWr]] - ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI2]], 1, 0, implicit-def $nzcv - ; CHECK: [[COPY6:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]] - ; CHECK: [[ADDWri1:%[0-9]+]]:gpr32sp = ADDWri [[PHI1]], 1, 0 - ; CHECK: [[COPY7:%[0-9]+]]:gpr32all = COPY [[ADDWri1]] + ; CHECK: [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %5, %bb.3 + ; CHECK: [[PHI2:%[0-9]+]]:fpr32 = PHI [[FMOVS0_1]], %bb.1, %4, %bb.3 + ; CHECK: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY2]], 0 :: (load 4 from %ir.read, !tbaa !0) + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1109917696 + ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]] + ; CHECK: [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr [[LDRSui]], [[COPY3]] + ; CHECK: [[FADDSrr1:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr [[FADDSrr]], [[PHI2]] + ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv + ; CHECK: [[COPY4:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]] ; CHECK: Bcc 0, %bb.2, implicit $nzcv ; CHECK: B %bb.3 bb.0.entry: successors: %bb.1(0x50000000), %bb.2(0x30000000) - liveins: $x0, $x1, $x2, $w3 + liveins: $x0, $x1, $w3 + + %9:gpr32common = COPY $w3 + %7:gpr64common = COPY $x1 + %6:gpr64common = COPY $x0 + %11:fpr32 = LDRSui %6, 0 :: (load 4 from %ir.read, !tbaa !6) + %12:gpr32 = MOVi32imm 1109917696 + %13:fpr32 = COPY %12 + %0:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr killed %11, killed %13 + %10:fpr32 = FMOVS0 + %14:gpr32 = SUBSWri %9, 1, 0, implicit-def $nzcv + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 - %12:gpr32common = COPY $w3 - %11:gpr64common = COPY $x2 - %10:gpr64common = COPY $x1 - %9:gpr64common = COPY $x0 - %13:gpr32common = LDRWui %9, 0 :: (load 4 from %ir.read, !tbaa !6) - %15:gpr32 = SUBSWri %12, 1, 0, implicit-def $nzcv + bb.1.for.body.preheader: + successors: %bb.3(0x80000000) + + %15:fpr32 = FMOVS0 + B %bb.3 + + bb.2.for.cond.cleanup: + %1:fpr32 = PHI %10, %bb.0, %4, %bb.3 + STRSui %1, %7, 0 :: (store 4 into %ir.write, !tbaa !6) + RET_ReallyLR + + bb.3.for.body: + successors: %bb.2(0x04000000), %bb.3(0x7c000000) + + %2:gpr32sp = PHI %9, %bb.1, %5, %bb.3 + %3:fpr32 = PHI %15, %bb.1, %4, %bb.3 + %4:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr %0, %3 + %16:gpr32 = SUBSWri %2, 1, 0, implicit-def $nzcv + %5:gpr32all = COPY %16 + Bcc 0, %bb.2, implicit $nzcv + B %bb.3 + +... +--- +name: do_sink_no_aliased_store +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: gpr32sp, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32all, preferred-register: '' } + - { id: 6, class: gpr64common, preferred-register: '' } + - { id: 7, class: gpr64common, preferred-register: '' } + - { id: 8, class: gpr64common, preferred-register: '' } + - { id: 9, class: gpr32common, preferred-register: '' } + - { id: 10, class: fpr32, preferred-register: '' } + - { id: 11, class: fpr32, preferred-register: '' } + - { id: 12, class: gpr32, preferred-register: '' } + - { id: 13, class: fpr32, preferred-register: '' } + - { id: 14, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%6' } + - { reg: '$x1', virtual-reg: '%7' } + - { reg: '$x2', virtual-reg: '%8' } + - { reg: '$w3', virtual-reg: '%9' } + - { reg: '$s0', virtual-reg: '%10' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: do_sink_no_aliased_store + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x50000000), %bb.4(0x30000000) + ; CHECK: liveins: $x0, $x1, $x2, $w3, $s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32common = COPY $w3 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[COPY4:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 1, 0, implicit-def $nzcv + ; CHECK: Bcc 10, %bb.1, implicit $nzcv + ; CHECK: bb.4: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: B %bb.2 + ; CHECK: bb.1.for.body.preheader: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: STRSui [[COPY]], [[COPY2]], 0 :: (store 4 into %ir.store, !tbaa !0) + ; CHECK: B %bb.3 + ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: [[PHI:%[0-9]+]]:fpr32 = PHI [[FMOVS0_]], %bb.4, %4, %bb.3 + ; CHECK: STRSui [[PHI]], [[COPY3]], 0 :: (store 4 into %ir.write, !tbaa !0) + ; CHECK: RET_ReallyLR + ; CHECK: bb.3.for.body: + ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) + ; CHECK: [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY1]], %bb.1, %5, %bb.3 + ; CHECK: [[PHI2:%[0-9]+]]:fpr32 = PHI [[FMOVS0_1]], %bb.1, %4, %bb.3 + ; CHECK: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY4]], 0 :: (load 4 from %ir.read, !tbaa !0) + ; CHECK: [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr [[PHI2]], [[LDRSui]] + ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv + ; CHECK: [[COPY5:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]] + ; CHECK: Bcc 0, %bb.2, implicit $nzcv + ; CHECK: B %bb.3 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $x0, $x1, $x2, $w3, $s0 + + %10:fpr32 = COPY $s0 + %9:gpr32common = COPY $w3 + %8:gpr64common = COPY $x2 + %7:gpr64common = COPY $x1 + %6:gpr64common = COPY $x0 + %11:fpr32 = FMOVS0 + %12:gpr32 = SUBSWri %9, 1, 0, implicit-def $nzcv Bcc 11, %bb.2, implicit $nzcv B %bb.1 bb.1.for.body.preheader: successors: %bb.3(0x80000000) - %16:gpr32sp = ADDWri %13, 42, 0 - %1:gpr32all = COPY %16 - %14:gpr32 = MOVi32imm 43 - STRWui killed %14, %11, 0 :: (store 4 into %ir.store, !tbaa !6) + %13:fpr32 = FMOVS0 + %0:fpr32 = LDRSui %6, 0 :: (load 4 from %ir.read, !tbaa !6) + STRSui %10, %8, 0 :: (store 4 into %ir.store, !tbaa !6) B %bb.3 bb.2.for.cond.cleanup: - %2:gpr32 = PHI %12, %bb.0, %6, %bb.3 - STRWui %2, %10, 0 :: (store 4 into %ir.write, !tbaa !6) + %1:fpr32 = PHI %11, %bb.0, %4, %bb.3 + STRSui %1, %7, 0 :: (store 4 into %ir.write, !tbaa !6) RET_ReallyLR bb.3.for.body: successors: %bb.2(0x04000000), %bb.3(0x7c000000) - %3:gpr32common = PHI %1, %bb.1, %8, %bb.3 - %4:gpr32sp = PHI %12, %bb.1, %7, %bb.3 - %5:gpr32 = PHI %12, %bb.1, %6, %bb.3 - %17:gpr32 = SDIVWr %5, %3 - %6:gpr32all = COPY %17 - %18:gpr32 = SUBSWri %4, 1, 0, implicit-def $nzcv - %7:gpr32all = COPY %18 - %19:gpr32sp = ADDWri %3, 1, 0 - %8:gpr32all = COPY %19 + %2:gpr32sp = PHI %9, %bb.1, %5, %bb.3 + %3:fpr32 = PHI %13, %bb.1, %4, %bb.3 + %4:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr %3, %0 + %14:gpr32 = SUBSWri %2, 1, 0, implicit-def $nzcv + %5:gpr32all = COPY %14 Bcc 0, %bb.2, implicit $nzcv B %bb.3 ... --- -name: aliased_store_after_add +name: cant_sink_load_aliased_store alignment: 16 exposesReturnsTwice: false legalized: false @@ -1266,31 +1756,27 @@ tracksRegLiveness: true hasWinCFI: false registers: - - { id: 0, class: gpr32sp, preferred-register: '' } - - { id: 1, class: gpr32all, preferred-register: '' } - - { id: 2, class: gpr32, preferred-register: '' } - - { id: 3, class: gpr32common, preferred-register: '' } - - { id: 4, class: gpr32sp, preferred-register: '' } - - { id: 5, class: gpr32, preferred-register: '' } - - { id: 6, class: gpr32all, preferred-register: '' } - - { id: 7, class: gpr32all, preferred-register: '' } - - { id: 8, class: gpr32all, preferred-register: '' } - - { id: 9, class: gpr64common, preferred-register: '' } - - { id: 10, class: gpr64common, preferred-register: '' } - - { id: 11, class: gpr64common, preferred-register: '' } - - { id: 12, class: gpr32common, preferred-register: '' } - - { id: 13, class: gpr32common, preferred-register: '' } + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: gpr32sp, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32all, preferred-register: '' } + - { id: 6, class: gpr64common, preferred-register: '' } + - { id: 7, class: gpr64common, preferred-register: '' } + - { id: 8, class: gpr64common, preferred-register: '' } + - { id: 9, class: gpr32common, preferred-register: '' } + - { id: 10, class: fpr32, preferred-register: '' } + - { id: 11, class: fpr32, preferred-register: '' } + - { id: 12, class: gpr32, preferred-register: '' } + - { id: 13, class: fpr32, preferred-register: '' } - { id: 14, class: gpr32, preferred-register: '' } - - { id: 15, class: gpr32, preferred-register: '' } - - { id: 16, class: gpr32sp, preferred-register: '' } - - { id: 17, class: gpr32, preferred-register: '' } - - { id: 18, class: gpr32, preferred-register: '' } - - { id: 19, class: gpr32sp, preferred-register: '' } liveins: - - { reg: '$x0', virtual-reg: '%9' } - - { reg: '$x1', virtual-reg: '%10' } - - { reg: '$x2', virtual-reg: '%11' } - - { reg: '$w3', virtual-reg: '%12' } + - { reg: '$x0', virtual-reg: '%6' } + - { reg: '$x1', virtual-reg: '%7' } + - { reg: '$x2', virtual-reg: '%8' } + - { reg: '$w3', virtual-reg: '%9' } + - { reg: '$s0', virtual-reg: '%10' } frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false @@ -1317,81 +1803,206 @@ constants: [] machineFunctionInfo: {} body: | - ; CHECK-LABEL: name: aliased_store_after_add + ; CHECK-LABEL: name: cant_sink_load_aliased_store ; CHECK: bb.0.entry: - ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) - ; CHECK: liveins: $x0, $x1, $x2, $w3 - ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w3 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x2 - ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY $x1 - ; CHECK: [[COPY3:%[0-9]+]]:gpr64common = COPY $x0 - ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv - ; CHECK: Bcc 11, %bb.2, implicit $nzcv - ; CHECK: B %bb.1 + ; CHECK: successors: %bb.1(0x50000000), %bb.4(0x30000000) + ; CHECK: liveins: $x0, $x1, $x2, $w3, $s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32common = COPY $w3 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[COPY4:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 1, 0, implicit-def $nzcv + ; CHECK: Bcc 10, %bb.1, implicit $nzcv + ; CHECK: bb.4: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: B %bb.2 ; CHECK: bb.1.for.body.preheader: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY3]], 0 :: (load 4 from %ir.read, !tbaa !0) - ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0 - ; CHECK: [[COPY4:%[0-9]+]]:gpr32all = COPY [[ADDWri]] - ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 43 - ; CHECK: STRWui killed [[MOVi32imm]], [[COPY3]], 0 :: (store 4 into %ir.read, !tbaa !0) + ; CHECK: [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY4]], 0 :: (load 4 from %ir.read, !tbaa !0) + ; CHECK: STRSui [[COPY]], [[COPY2]], 0 :: (store 4 into %ir.store, !tbaa !0) ; CHECK: B %bb.3 ; CHECK: bb.2.for.cond.cleanup: - ; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.0, %6, %bb.3 - ; CHECK: STRWui [[PHI]], [[COPY2]], 0 :: (store 4 into %ir.write, !tbaa !0) + ; CHECK: [[PHI:%[0-9]+]]:fpr32 = PHI [[FMOVS0_]], %bb.4, %4, %bb.3 + ; CHECK: STRSui [[PHI]], [[COPY3]], 0 :: (store 4 into %ir.write, !tbaa !0) ; CHECK: RET_ReallyLR ; CHECK: bb.3.for.body: ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) - ; CHECK: [[PHI1:%[0-9]+]]:gpr32common = PHI [[COPY4]], %bb.1, %8, %bb.3 - ; CHECK: [[PHI2:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %7, %bb.3 - ; CHECK: [[PHI3:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %6, %bb.3 - ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI3]], [[PHI1]] - ; CHECK: [[COPY5:%[0-9]+]]:gpr32all = COPY [[SDIVWr]] - ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI2]], 1, 0, implicit-def $nzcv - ; CHECK: [[COPY6:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]] - ; CHECK: [[ADDWri1:%[0-9]+]]:gpr32sp = ADDWri [[PHI1]], 1, 0 - ; CHECK: [[COPY7:%[0-9]+]]:gpr32all = COPY [[ADDWri1]] + ; CHECK: [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY1]], %bb.1, %5, %bb.3 + ; CHECK: [[PHI2:%[0-9]+]]:fpr32 = PHI [[FMOVS0_1]], %bb.1, %4, %bb.3 + ; CHECK: [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr [[PHI2]], [[LDRSui]] + ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv + ; CHECK: [[COPY5:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]] ; CHECK: Bcc 0, %bb.2, implicit $nzcv ; CHECK: B %bb.3 bb.0.entry: successors: %bb.1(0x50000000), %bb.2(0x30000000) - liveins: $x0, $x1, $x2, $w3 + liveins: $x0, $x1, $x2, $w3, $s0 + + %10:fpr32 = COPY $s0 + %9:gpr32common = COPY $w3 + %8:gpr64common = COPY $x2 + %7:gpr64common = COPY $x1 + %6:gpr64common = COPY $x0 + %11:fpr32 = FMOVS0 + %12:gpr32 = SUBSWri %9, 1, 0, implicit-def $nzcv + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 - %12:gpr32common = COPY $w3 - %11:gpr64common = COPY $x2 - %10:gpr64common = COPY $x1 - %9:gpr64common = COPY $x0 - %13:gpr32common = LDRWui %9, 0 :: (load 4 from %ir.read, !tbaa !6) - %15:gpr32 = SUBSWri %12, 1, 0, implicit-def $nzcv + bb.1.for.body.preheader: + successors: %bb.3(0x80000000) + + %13:fpr32 = FMOVS0 + %0:fpr32 = LDRSui %6, 0 :: (load 4 from %ir.read, !tbaa !6) + STRSui %10, %8, 0 :: (store 4 into %ir.store, !tbaa !6) + B %bb.3 + + bb.2.for.cond.cleanup: + %1:fpr32 = PHI %11, %bb.0, %4, %bb.3 + STRSui %1, %7, 0 :: (store 4 into %ir.write, !tbaa !6) + RET_ReallyLR + + bb.3.for.body: + successors: %bb.2(0x04000000), %bb.3(0x7c000000) + + %2:gpr32sp = PHI %9, %bb.1, %5, %bb.3 + %3:fpr32 = PHI %13, %bb.1, %4, %bb.3 + %4:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr %3, %0 + %14:gpr32 = SUBSWri %2, 1, 0, implicit-def $nzcv + %5:gpr32all = COPY %14 + Bcc 0, %bb.2, implicit $nzcv + B %bb.3 + +... +--- +name: cant_sink_aliased_store_in_loop +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: gpr32sp, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32all, preferred-register: '' } + - { id: 6, class: gpr64common, preferred-register: '' } + - { id: 7, class: gpr64common, preferred-register: '' } + - { id: 8, class: gpr64common, preferred-register: '' } + - { id: 9, class: gpr32common, preferred-register: '' } + - { id: 10, class: fpr32, preferred-register: '' } + - { id: 11, class: fpr32, preferred-register: '' } + - { id: 12, class: gpr32, preferred-register: '' } + - { id: 13, class: fpr32, preferred-register: '' } + - { id: 14, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%6' } + - { reg: '$x1', virtual-reg: '%7' } + - { reg: '$x2', virtual-reg: '%8' } + - { reg: '$w3', virtual-reg: '%9' } + - { reg: '$s0', virtual-reg: '%10' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: cant_sink_aliased_store_in_loop + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x50000000), %bb.4(0x30000000) + ; CHECK: liveins: $x0, $x1, $x2, $w3, $s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32common = COPY $w3 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[COPY4:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 1, 0, implicit-def $nzcv + ; CHECK: Bcc 10, %bb.1, implicit $nzcv + ; CHECK: bb.4: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: B %bb.2 + ; CHECK: bb.1.for.body.preheader: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY4]], 0 :: (load 4 from %ir.read, !tbaa !0) + ; CHECK: [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: B %bb.3 + ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: [[PHI:%[0-9]+]]:fpr32 = PHI [[FMOVS0_]], %bb.4, %4, %bb.3 + ; CHECK: STRSui [[PHI]], [[COPY3]], 0 :: (store 4 into %ir.write, !tbaa !0) + ; CHECK: RET_ReallyLR + ; CHECK: bb.3.for.body: + ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) + ; CHECK: [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY1]], %bb.1, %5, %bb.3 + ; CHECK: [[PHI2:%[0-9]+]]:fpr32 = PHI [[FMOVS0_1]], %bb.1, %4, %bb.3 + ; CHECK: [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr [[PHI2]], [[LDRSui]] + ; CHECK: STRSui [[COPY]], [[COPY2]], 0 :: (store 4 into %ir.store, !tbaa !0) + ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv + ; CHECK: [[COPY5:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]] + ; CHECK: Bcc 0, %bb.2, implicit $nzcv + ; CHECK: B %bb.3 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $x0, $x1, $x2, $w3, $s0 + + %10:fpr32 = COPY $s0 + %9:gpr32common = COPY $w3 + %8:gpr64common = COPY $x2 + %7:gpr64common = COPY $x1 + %6:gpr64common = COPY $x0 + %0:fpr32 = LDRSui %6, 0 :: (load 4 from %ir.read, !tbaa !6) + %11:fpr32 = FMOVS0 + %12:gpr32 = SUBSWri %9, 1, 0, implicit-def $nzcv Bcc 11, %bb.2, implicit $nzcv B %bb.1 bb.1.for.body.preheader: successors: %bb.3(0x80000000) - %16:gpr32sp = ADDWri %13, 42, 0 - %1:gpr32all = COPY %16 - %14:gpr32 = MOVi32imm 43 - STRWui killed %14, %9, 0 :: (store 4 into %ir.read, !tbaa !6) + %13:fpr32 = FMOVS0 B %bb.3 bb.2.for.cond.cleanup: - %2:gpr32 = PHI %12, %bb.0, %6, %bb.3 - STRWui %2, %10, 0 :: (store 4 into %ir.write, !tbaa !6) + %1:fpr32 = PHI %11, %bb.0, %4, %bb.3 + STRSui %1, %7, 0 :: (store 4 into %ir.write, !tbaa !6) RET_ReallyLR bb.3.for.body: successors: %bb.2(0x04000000), %bb.3(0x7c000000) - %3:gpr32common = PHI %1, %bb.1, %8, %bb.3 - %4:gpr32sp = PHI %12, %bb.1, %7, %bb.3 - %5:gpr32 = PHI %12, %bb.1, %6, %bb.3 - %17:gpr32 = SDIVWr %5, %3 - %6:gpr32all = COPY %17 - %18:gpr32 = SUBSWri %4, 1, 0, implicit-def $nzcv - %7:gpr32all = COPY %18 - %19:gpr32sp = ADDWri %3, 1, 0 - %8:gpr32all = COPY %19 + %2:gpr32sp = PHI %9, %bb.1, %5, %bb.3 + %3:fpr32 = PHI %13, %bb.1, %4, %bb.3 + %4:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr %3, %0 + STRSui %10, %8, 0 :: (store 4 into %ir.store, !tbaa !6) + %14:gpr32 = SUBSWri %2, 1, 0, implicit-def $nzcv + %5:gpr32all = COPY %14 Bcc 0, %bb.2, implicit $nzcv B %bb.3