Index: llvm/lib/CodeGen/MachineSink.cpp =================================================================== --- llvm/lib/CodeGen/MachineSink.cpp +++ llvm/lib/CodeGen/MachineSink.cpp @@ -227,6 +227,12 @@ void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, SmallVectorImpl &Candidates); bool SinkIntoLoop(MachineLoop *L, MachineInstr &I); + bool IsSafeToMove(MachineLoop *L, MachineInstr &I, + MachineBasicBlock *SinkTo); + bool AreAliased(MachineInstr &First, MachineInstr &Second, + MachineBasicBlock *From, MachineBasicBlock *To, + DenseSet HandledDomBlocks, + bool &SawStore, bool &HasAliasedStore) ; bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -352,24 +358,6 @@ return true; } -/// Return true if this machine instruction loads from global offset table or -/// constant pool. -static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { - assert(MI.mayLoad() && "Expected MI that loads!"); - - // If we lost memory operands, conservatively assume that the instruction - // reads from everything.. - if (MI.memoperands_empty()) - return true; - - for (MachineMemOperand *MemOp : MI.memoperands()) - if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) - if (PSV->isGOT() || PSV->isConstantPool()) - return true; - - return false; -} - void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, SmallVectorImpl &Candidates) { for (auto &MI : *BB) { @@ -379,27 +367,28 @@ "target\n"); continue; } + // If physical registers are used, then this is marked as not loop + // invariant. This can be the case if the preheader is the entry block, and + // when there are copy instructions of function arguments that are passed + // through registers. if (!L->isLoopInvariant(MI)) { LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n"); continue; } - bool DontMoveAcrossStore = true; - if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n"); - continue; - } - if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n"); - continue; - } if (MI.isConvergent()) continue; + // Skip instruction that don't produce values, like branches and certain + // store instructions (that e.g. don't post-increment). const MachineOperand &MO = MI.getOperand(0); - if (!MO.isReg() || !MO.getReg() || !MO.isDef()) + if (!MO.isReg() || !MO.getReg() || !MO.isDef()) { + LLVM_DEBUG(dbgs() << "LoopSink: Instruction does not define a value.\n"); continue; - if (!MRI->hasOneDef(MO.getReg())) + } + if (!MRI->hasOneDef(MO.getReg())) { + LLVM_DEBUG(dbgs() << "LoopSink: Instruction does not have 1 def.\n"); continue; + } LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n"); Candidates.push_back(&MI); @@ -470,8 +459,13 @@ // of a def-use chain, if there is any. for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) { MachineInstr *I = *It; + + // TODO: This is conservative because we bail as soon as we find one + // instruction that cannot be sunk. Better is to do this per def-use + // chain, so we try a next chain if one fails. if (!SinkIntoLoop(L, *I)) break; + EverMadeChange = true; ++NumLoopSunk; } @@ -1155,29 +1149,10 @@ } for (MachineInstr &I : *BB) { - // Treat as alias conservatively for a call or an ordered memory - // operation. - if (I.isCall() || I.hasOrderedMemoryRef()) { - for (auto *DomBB : HandledDomBlocks) { - if (DomBB != BB && DT->dominates(DomBB, BB)) - HasStoreCache[std::make_pair(DomBB, To)] = true; - else if(DomBB != BB && DT->dominates(BB, DomBB)) - HasStoreCache[std::make_pair(From, DomBB)] = true; - } - HasStoreCache[BlockPair] = true; + bool Aliased = AreAliased(I, MI, From, To, HandledBlocks, SawStore, + HasAliasedStore); + if (Aliased && (I.isCall() || I.hasOrderedMemoryRef())) return true; - } - - if (I.mayStore()) { - SawStore = true; - // We still have chance to sink MI if all stores between are not - // aliased to MI. - // Cache all store instructions, so that we don't need to go through - // all From reachable blocks for next load instruction. - if (I.mayAlias(AA, MI, false)) - HasAliasedStore = true; - StoreInstrCache[BlockPair].push_back(&I); - } } } } @@ -1187,6 +1162,86 @@ return HasAliasedStore; } +bool MachineSinking::AreAliased(MachineInstr &First, MachineInstr &Second, + MachineBasicBlock *From, MachineBasicBlock *To, + DenseSet HandledDomBlocks, bool &SawStore, + bool &HasAliasedStore) { + MachineBasicBlock *BB = First.getParent(); + auto BlockPair = std::make_pair(From, To); + + if (First.isCall() || Second.hasOrderedMemoryRef()) { + for (auto *DomBB : HandledDomBlocks) { + if (DomBB != BB && DT->dominates(DomBB, BB)) + HasStoreCache[std::make_pair(DomBB, To)] = true; + else if(DomBB != BB && DT->dominates(BB, DomBB)) + HasStoreCache[std::make_pair(From, DomBB)] = true; + } + HasStoreCache[BlockPair] = true; + return true; + } + + if (First.mayStore()) { + SawStore = true; + // We still have chance to sink MI if all stores between are not + // aliased to MI. + // Cache all store instructions, so that we don't need to go through + // all From reachable blocks for next load instruction. + if (First.mayAlias(AA, Second, false)) + HasAliasedStore = true; + StoreInstrCache[BlockPair].push_back(&First); + } + + // If there is no store at all, cache the result. + if (!SawStore) + HasStoreCache[BlockPair] = false; + return HasAliasedStore; +} + +bool MachineSinking::IsSafeToMove(MachineLoop *L, MachineInstr &I, + MachineBasicBlock *SinkTo) { + if (LI->getLoopFor(SinkTo) != L) + return false; + + auto End = I.getParent()->instr_end(); + auto It = I.getIterator(); + + // 1) First, analyse all instruction from the current instruction I to the end + // of its block. + bool HasAliasedStore = false; + bool SawStore = false; + ++It; + for ( ; It != End; ++It) { + if (AreAliased(*It, I, I.getParent(), SinkTo, {}, SawStore, + HasAliasedStore)) { + LLVM_DEBUG(dbgs() << "LoopSink: Alias pair found!\n"); + return false; + } + LLVM_DEBUG(dbgs() << "LoopSink: Current block, not aliased with : " << *It); + } + + // This isSafeToMove check is not doing any alias analysis, but checks + // different instruction types, side-effects, etc. It uses 'SawStore' that is + // set in 1) which analyses the block of the sink instruction, and in 2) alias + // analysis of the loop blocks is performed. + if (!I.isSafeToMove(AA, SawStore)) { + LLVM_DEBUG(dbgs() << "LoopSink: Not safe to move!\n"); + return false; + } + + // 2) Next, check all instructions in the loop to see if there are aliases. + for (auto *BB : L->blocks()) { + for (auto &CurI : *BB) { + if (AreAliased(CurI, I, I.getParent(), SinkTo, {}, SawStore, HasAliasedStore)) { + LLVM_DEBUG(dbgs() << "LoopSink: Alias found in a loop block: " << CurI); + return false; + } + LLVM_DEBUG(dbgs() << "LoopSink: Not aliased with loop ins: " << CurI); + } + } + LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not aliased, safe to move!\n"); + return true; +} + /// Sink instructions into loops if profitable. This especially tries to prevent /// register spills caused by register pressure if there is little to no /// overhead moving instructions into loops. @@ -1209,12 +1264,7 @@ // FIXME: Come up with a proper cost model that estimates whether sinking // the instruction (and thus possibly executing it on every loop // iteration) is more expensive than a register. - // For now assumes that copies are cheap and thus almost always worth it. - if (!MI.isCopy()) { - LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n"); - CanSink = false; - break; - } + if (!SinkBlock) { SinkBlock = MI.getParent(); LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: " @@ -1243,6 +1293,10 @@ LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n"); return false; } + if (!IsSafeToMove(L, I, SinkBlock)) { + LLVM_DEBUG(dbgs() << "LoopSink: Not safe to move\n"); + return false; + } LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n"); SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I); Index: llvm/test/CodeGen/AArch64/loop-sink.mir =================================================================== --- llvm/test/CodeGen/AArch64/loop-sink.mir +++ llvm/test/CodeGen/AArch64/loop-sink.mir @@ -6,6 +6,7 @@ @A = external dso_local global [100 x i32], align 4 %struct.A = type { i32, i32, i32, i32, i32, i32 } + @G = external dso_local local_unnamed_addr global i32, align 4 define void @cant_sink_adds_call_in_block(i8* nocapture readonly %input, %struct.A* %a) { %1 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 1 @@ -151,7 +152,7 @@ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } - define dso_local void @sink_add(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32 %n) local_unnamed_addr #0 { + define dso_local void @sink_load_add_chain(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32 %n) local_unnamed_addr #0 { entry: %0 = load i32, i32* %read, align 4, !tbaa !6 %cmp10 = icmp sgt i32 %n, 0 @@ -204,7 +205,7 @@ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10 } - define dso_local void @aliased_store_after_add(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32* nocapture %store, i32 %n) local_unnamed_addr #0 { + define dso_local void @aliased_store_imm_after_add(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32* nocapture %store, i32 %n) local_unnamed_addr #0 { entry: %0 = load i32, i32* %read, align 4, !tbaa !6 %cmp10 = icmp sgt i32 %n, 0 @@ -231,9 +232,100 @@ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10 } + define dso_local void @aliased_store_after_load(i32* noalias nocapture %read, i32* noalias nocapture %write, i32* nocapture readnone %store, i32 %n) local_unnamed_addr #0 { + entry: + %cmp12 = icmp sgt i32 %n, 0 + br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup + + for.body.preheader: ; preds = %entry + %0 = load i32, i32* %read, align 4, !tbaa !6 + store i32 %n, i32* %read, align 4, !tbaa !6 + %1 = add i32 %0, 42 + br label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ] + store i32 %sum.0.lcssa, i32* %write, align 4, !tbaa !6 + ret void + + for.body: ; preds = %for.body.preheader, %for.body + %lsr.iv1 = phi i32 [ %1, %for.body.preheader ], [ %lsr.iv.next2, %for.body ] + %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] + %sum.013 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ] + %div = sdiv i32 %sum.013, %lsr.iv1 + %lsr.iv.next = add i32 %lsr.iv, -1 + %lsr.iv.next2 = add i32 %lsr.iv1, 1 + %exitcond.not = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10 + } + + define dso_local void @cant_sink_multi_block_loop_with_call(i32* noalias nocapture %read, i32* noalias nocapture %write, i32* nocapture readnone %store, i32 %n) local_unnamed_addr #0 { + entry: + %0 = load i32, i32* %read, align 4 + store i32 %n, i32* %read, align 4 + %cmp12 = icmp sgt i32 %n, 0 + br i1 %cmp12, label %for.body.lr.ph, label %for.cond.cleanup + + for.body.lr.ph: ; preds = %entry + %1 = load i32, i32* @G, align 4 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %for.body.us.preheader, label %for.body.preheader + + for.body.preheader: ; preds = %for.body.lr.ph + %3 = add i32 %0, 42 + br label %for.body + + for.body.us.preheader: ; preds = %for.body.lr.ph + %4 = add i32 %n, -1 + %5 = add i32 %0, 42 + br label %for.body.us + + for.body.us: ; preds = %for.body.us.preheader, %for.inc.us.for.body.us_crit_edge + %lsr.iv2 = phi i32 [ %5, %for.body.us.preheader ], [ %lsr.iv.next3, %for.inc.us.for.body.us_crit_edge ] + %lsr.iv = phi i32 [ %4, %for.body.us.preheader ], [ %lsr.iv.next, %for.inc.us.for.body.us_crit_edge ] + %6 = phi i32 [ %.pre, %for.inc.us.for.body.us_crit_edge ], [ 0, %for.body.us.preheader ] + %sum.013.us = phi i32 [ %sum.1.us, %for.inc.us.for.body.us_crit_edge ], [ %n, %for.body.us.preheader ] + %tobool.not.us = icmp eq i32 %6, 0 + br i1 %tobool.not.us, label %if.else.us, label %if.then.us + + if.then.us: ; preds = %for.body.us + %div.us = sdiv i32 %sum.013.us, %lsr.iv2 + br label %for.inc.us + + if.else.us: ; preds = %for.body.us + tail call void @H() #2 + br label %for.inc.us + + for.inc.us: ; preds = %if.else.us, %if.then.us + %sum.1.us = phi i32 [ %div.us, %if.then.us ], [ %sum.013.us, %if.else.us ] + %exitcond.not = icmp eq i32 %lsr.iv, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.inc.us.for.body.us_crit_edge, !llvm.loop !10 + + for.inc.us.for.body.us_crit_edge: ; preds = %for.inc.us + %.pre = load i32, i32* @G, align 4 + %lsr.iv.next = add i32 %lsr.iv, -1 + %lsr.iv.next3 = add i32 %lsr.iv2, 1 + br label %for.body.us + + for.cond.cleanup: ; preds = %for.body, %for.inc.us, %entry + %sum.0.lcssa = phi i32 [ %n, %entry ], [ %sum.1.us, %for.inc.us ], [ %div, %for.body ] + store i32 %sum.0.lcssa, i32* %write, align 4, !tbaa !6 + ret void + + for.body: ; preds = %for.body.preheader, %for.body + %lsr.iv6 = phi i32 [ %3, %for.body.preheader ], [ %lsr.iv.next7, %for.body ] + %lsr.iv4 = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next5, %for.body ] + %sum.013 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ] + %div = sdiv i32 %sum.013, %lsr.iv6 + %lsr.iv.next5 = add i32 %lsr.iv4, -1 + %lsr.iv.next7 = add i32 %lsr.iv6, 1 + %exitcond17.not = icmp eq i32 %lsr.iv.next5, 0 + br i1 %exitcond17.not, label %for.cond.cleanup, label %for.body + } declare i32 @use(i32) declare void @_Z6assignPj(i32*) + declare void @H() !6 = !{!7, !7, i64 0} !7 = !{!"int", !8, i64 0} @@ -931,8 +1023,6 @@ ; CHECK: B %bb.1 ; CHECK: bb.1.for.body.preheader: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) ; CHECK: B %bb.3 ; CHECK: bb.2.for.cond.cleanup: ; CHECK: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %4, %bb.3 @@ -942,6 +1032,8 @@ ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) ; CHECK: [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %5, %bb.3 ; CHECK: [[PHI2:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %4, %bb.3 + ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI2]], [[LDRWui]] ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SDIVWr]] ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv @@ -983,7 +1075,7 @@ ... --- -name: sink_add +name: sink_load_add_chain alignment: 16 exposesReturnsTwice: false legalized: false @@ -1041,7 +1133,7 @@ constants: [] machineFunctionInfo: {} body: | - ; CHECK-LABEL: name: sink_add + ; CHECK-LABEL: name: sink_load_add_chain ; CHECK: bb.0.entry: ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) ; CHECK: liveins: $x0, $x1, $w2 @@ -1053,9 +1145,6 @@ ; CHECK: B %bb.1 ; CHECK: bb.1.for.body.preheader: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY2]], 0 :: (load 4 from %ir.read, !tbaa !0) - ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0 - ; CHECK: [[COPY3:%[0-9]+]]:gpr32all = COPY [[ADDWri]] ; CHECK: B %bb.3 ; CHECK: bb.2.for.cond.cleanup: ; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.0, %6, %bb.3 @@ -1063,9 +1152,12 @@ ; CHECK: RET_ReallyLR ; CHECK: bb.3.for.body: ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) - ; CHECK: [[PHI1:%[0-9]+]]:gpr32common = PHI [[COPY3]], %bb.1, %8, %bb.3 + ; CHECK: [[PHI1:%[0-9]+]]:gpr32common = PHI %1, %bb.1, %8, %bb.3 ; CHECK: [[PHI2:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %7, %bb.3 ; CHECK: [[PHI3:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %6, %bb.3 + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY2]], 0 :: (load 4 from %ir.read, !tbaa !0) + ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0 + ; CHECK: [[COPY3:%[0-9]+]]:gpr32all = COPY [[ADDWri]] ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI3]], [[PHI1]] ; CHECK: [[COPY4:%[0-9]+]]:gpr32all = COPY [[SDIVWr]] ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI2]], 1, 0, implicit-def $nzcv @@ -1256,7 +1348,7 @@ ... --- -name: aliased_store_after_add +name: aliased_store_imm_after_add alignment: 16 exposesReturnsTwice: false legalized: false @@ -1317,7 +1409,7 @@ constants: [] machineFunctionInfo: {} body: | - ; CHECK-LABEL: name: aliased_store_after_add + ; CHECK-LABEL: name: aliased_store_imm_after_add ; CHECK: bb.0.entry: ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) ; CHECK: liveins: $x0, $x1, $x2, $w3 @@ -1396,4 +1488,399 @@ B %bb.3 ... +--- +name: aliased_store_after_load +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gpr32sp, preferred-register: '' } + - { id: 1, class: gpr32all, preferred-register: '' } + - { id: 2, class: gpr32, preferred-register: '' } + - { id: 3, class: gpr32common, preferred-register: '' } + - { id: 4, class: gpr32sp, preferred-register: '' } + - { id: 5, class: gpr32, preferred-register: '' } + - { id: 6, class: gpr32all, preferred-register: '' } + - { id: 7, class: gpr32all, preferred-register: '' } + - { id: 8, class: gpr32all, preferred-register: '' } + - { id: 9, class: gpr64common, preferred-register: '' } + - { id: 10, class: gpr64common, preferred-register: '' } + - { id: 11, class: gpr64, preferred-register: '' } + - { id: 12, class: gpr32common, preferred-register: '' } + - { id: 13, class: gpr32common, preferred-register: '' } + - { id: 14, class: gpr32, preferred-register: '' } + - { id: 15, class: gpr32sp, preferred-register: '' } + - { id: 16, class: gpr32, preferred-register: '' } + - { id: 17, class: gpr32, preferred-register: '' } + - { id: 18, class: gpr32sp, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%9' } + - { reg: '$x1', virtual-reg: '%10' } + - { reg: '$w3', virtual-reg: '%12' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: aliased_store_after_load + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $x0, $x1, $w3 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w3 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: Bcc 11, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1.for.body.preheader: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY2]], 0 :: (load 4 from %ir.read, !tbaa !0) + ; CHECK: STRWui [[COPY]], [[COPY2]], 0 :: (store 4 into %ir.read, !tbaa !0) + ; CHECK: B %bb.3 + ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.0, %6, %bb.3 + ; CHECK: STRWui [[PHI]], [[COPY1]], 0 :: (store 4 into %ir.write, !tbaa !0) + ; CHECK: RET_ReallyLR + ; CHECK: bb.3.for.body: + ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) + ; CHECK: [[PHI1:%[0-9]+]]:gpr32common = PHI %1, %bb.1, %8, %bb.3 + ; CHECK: [[PHI2:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %7, %bb.3 + ; CHECK: [[PHI3:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %6, %bb.3 + ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0 + ; CHECK: [[COPY3:%[0-9]+]]:gpr32all = COPY [[ADDWri]] + ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI3]], [[PHI1]] + ; CHECK: [[COPY4:%[0-9]+]]:gpr32all = COPY [[SDIVWr]] + ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI2]], 1, 0, implicit-def $nzcv + ; CHECK: [[COPY5:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]] + ; CHECK: [[ADDWri1:%[0-9]+]]:gpr32sp = ADDWri [[PHI1]], 1, 0 + ; CHECK: [[COPY6:%[0-9]+]]:gpr32all = COPY [[ADDWri1]] + ; CHECK: Bcc 0, %bb.2, implicit $nzcv + ; CHECK: B %bb.3 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $x0, $x1, $w3 + + %12:gpr32common = COPY $w3 + %10:gpr64common = COPY $x1 + %9:gpr64common = COPY $x0 + %14:gpr32 = SUBSWri %12, 1, 0, implicit-def $nzcv + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 + + bb.1.for.body.preheader: + successors: %bb.3(0x80000000) + + %13:gpr32common = LDRWui %9, 0 :: (load 4 from %ir.read, !tbaa !6) + STRWui %12, %9, 0 :: (store 4 into %ir.read, !tbaa !6) + %15:gpr32sp = ADDWri %13, 42, 0 + %1:gpr32all = COPY %15 + B %bb.3 + + bb.2.for.cond.cleanup: + %2:gpr32 = PHI %12, %bb.0, %6, %bb.3 + STRWui %2, %10, 0 :: (store 4 into %ir.write, !tbaa !6) + RET_ReallyLR + + bb.3.for.body: + successors: %bb.2(0x04000000), %bb.3(0x7c000000) + + %3:gpr32common = PHI %1, %bb.1, %8, %bb.3 + %4:gpr32sp = PHI %12, %bb.1, %7, %bb.3 + %5:gpr32 = PHI %12, %bb.1, %6, %bb.3 + %16:gpr32 = SDIVWr %5, %3 + %6:gpr32all = COPY %16 + %17:gpr32 = SUBSWri %4, 1, 0, implicit-def $nzcv + %7:gpr32all = COPY %17 + %18:gpr32sp = ADDWri %3, 1, 0 + %8:gpr32all = COPY %18 + Bcc 0, %bb.2, implicit $nzcv + B %bb.3 + +... +--- +name: cant_sink_multi_block_loop_with_call +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gpr32sp, preferred-register: '' } + - { id: 1, class: gpr32all, preferred-register: '' } + - { id: 2, class: gpr32all, preferred-register: '' } + - { id: 3, class: gpr32all, preferred-register: '' } + - { id: 4, class: gpr32common, preferred-register: '' } + - { id: 5, class: gpr32common, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: gpr32, preferred-register: '' } + - { id: 8, class: gpr32all, preferred-register: '' } + - { id: 9, class: gpr32all, preferred-register: '' } + - { id: 10, class: gpr32all, preferred-register: '' } + - { id: 11, class: gpr32all, preferred-register: '' } + - { id: 12, class: gpr32all, preferred-register: '' } + - { id: 13, class: gpr32, preferred-register: '' } + - { id: 14, class: gpr32common, preferred-register: '' } + - { id: 15, class: gpr32sp, preferred-register: '' } + - { id: 16, class: gpr32, preferred-register: '' } + - { id: 17, class: gpr32all, preferred-register: '' } + - { id: 18, class: gpr32all, preferred-register: '' } + - { id: 19, class: gpr32all, preferred-register: '' } + - { id: 20, class: gpr64common, preferred-register: '' } + - { id: 21, class: gpr64common, preferred-register: '' } + - { id: 22, class: gpr64, preferred-register: '' } + - { id: 23, class: gpr32common, preferred-register: '' } + - { id: 24, class: gpr32common, preferred-register: '' } + - { id: 25, class: gpr32, preferred-register: '' } + - { id: 26, class: gpr64common, preferred-register: '' } + - { id: 27, class: gpr32, preferred-register: '' } + - { id: 28, class: gpr32sp, preferred-register: '' } + - { id: 29, class: gpr32, preferred-register: '' } + - { id: 30, class: gpr32, preferred-register: '' } + - { id: 31, class: gpr32sp, preferred-register: '' } + - { id: 32, class: gpr32all, preferred-register: '' } + - { id: 33, class: gpr32, preferred-register: '' } + - { id: 34, class: gpr32sp, preferred-register: '' } + - { id: 35, class: gpr32all, preferred-register: '' } + - { id: 36, class: gpr32, preferred-register: '' } + - { id: 37, class: gpr64common, preferred-register: '' } + - { id: 38, class: gpr32, preferred-register: '' } + - { id: 39, class: gpr32, preferred-register: '' } + - { id: 40, class: gpr32sp, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%20' } + - { reg: '$x1', virtual-reg: '%21' } + - { reg: '$w3', virtual-reg: '%23' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: cant_sink_multi_block_loop_with_call + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x50000000), %bb.9(0x30000000) + ; CHECK: liveins: $x0, $x1, $w3 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w3 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY2]], 0 :: (load 4 from %ir.read, !tbaa !0) + ; CHECK: STRWui [[COPY]], [[COPY2]], 0 :: (store 4 into %ir.read, !tbaa !0) + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: Bcc 11, %bb.9, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1.for.body.lr.ph: + ; CHECK: successors: %bb.3(0x30000000), %bb.2(0x50000000) + ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @G + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @G :: (dereferenceable load 4 from @G, !tbaa !0) + ; CHECK: CBZW killed [[LDRWui1]], %bb.3 + ; CHECK: B %bb.2 + ; CHECK: bb.2.for.body.preheader: + ; CHECK: successors: %bb.10(0x80000000) + ; CHECK: B %bb.10 + ; CHECK: bb.3.for.body.us.preheader: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: [[COPY3:%[0-9]+]]:gpr32all = COPY [[SUBSWri]] + ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0 + ; CHECK: [[COPY4:%[0-9]+]]:gpr32all = COPY $wzr + ; CHECK: [[COPY5:%[0-9]+]]:gpr32all = COPY [[COPY4]] + ; CHECK: [[COPY6:%[0-9]+]]:gpr32all = COPY [[ADDWri]] + ; CHECK: bb.4.for.body.us: + ; CHECK: successors: %bb.6(0x30000000), %bb.5(0x50000000) + ; CHECK: [[PHI:%[0-9]+]]:gpr32common = PHI [[COPY6]], %bb.3, %12, %bb.8 + ; CHECK: [[PHI1:%[0-9]+]]:gpr32common = PHI [[COPY3]], %bb.3, %11, %bb.8 + ; CHECK: [[PHI2:%[0-9]+]]:gpr32 = PHI [[COPY5]], %bb.3, %10, %bb.8 + ; CHECK: [[PHI3:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.3, %9, %bb.8 + ; CHECK: CBZW [[PHI2]], %bb.6 + ; CHECK: B %bb.5 + ; CHECK: bb.5.if.then.us: + ; CHECK: successors: %bb.7(0x80000000) + ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI3]], [[PHI]] + ; CHECK: [[COPY7:%[0-9]+]]:gpr32all = COPY [[SDIVWr]] + ; CHECK: B %bb.7 + ; CHECK: bb.6.if.else.us: + ; CHECK: successors: %bb.7(0x80000000) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: BL @H, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: bb.7.for.inc.us: + ; CHECK: successors: %bb.9(0x04000000), %bb.8(0x7c000000) + ; CHECK: [[PHI4:%[0-9]+]]:gpr32all = PHI [[COPY7]], %bb.5, [[PHI3]], %bb.6 + ; CHECK: CBZW [[PHI1]], %bb.9 + ; CHECK: B %bb.8 + ; CHECK: bb.8.for.inc.us.for.body.us_crit_edge: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: [[LDRWui2:%[0-9]+]]:gpr32 = LDRWui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @G :: (dereferenceable load 4 from @G, !tbaa !0) + ; CHECK: [[COPY8:%[0-9]+]]:gpr32all = COPY [[LDRWui2]] + ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def dead $nzcv + ; CHECK: [[COPY9:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]] + ; CHECK: [[ADDWri1:%[0-9]+]]:gpr32sp = ADDWri [[PHI]], 1, 0 + ; CHECK: [[COPY10:%[0-9]+]]:gpr32all = COPY [[ADDWri1]] + ; CHECK: B %bb.4 + ; CHECK: bb.9.for.cond.cleanup: + ; CHECK: [[PHI5:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.0, %17, %bb.10, [[PHI4]], %bb.7 + ; CHECK: STRWui [[PHI5]], [[COPY1]], 0 :: (store 4 into %ir.write, !tbaa !0) + ; CHECK: RET_ReallyLR + ; CHECK: bb.10.for.body: + ; CHECK: successors: %bb.9(0x04000000), %bb.10(0x7c000000) + ; CHECK: [[PHI6:%[0-9]+]]:gpr32common = PHI %1, %bb.2, %19, %bb.10 + ; CHECK: [[PHI7:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.2, %18, %bb.10 + ; CHECK: [[PHI8:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.2, %17, %bb.10 + ; CHECK: [[ADDWri2:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0 + ; CHECK: [[COPY11:%[0-9]+]]:gpr32all = COPY [[ADDWri2]] + ; CHECK: [[SDIVWr1:%[0-9]+]]:gpr32 = SDIVWr [[PHI8]], [[PHI6]] + ; CHECK: [[COPY12:%[0-9]+]]:gpr32all = COPY [[SDIVWr1]] + ; CHECK: [[SUBSWri2:%[0-9]+]]:gpr32 = SUBSWri [[PHI7]], 1, 0, implicit-def $nzcv + ; CHECK: [[COPY13:%[0-9]+]]:gpr32all = COPY [[SUBSWri2]] + ; CHECK: [[ADDWri3:%[0-9]+]]:gpr32sp = ADDWri [[PHI6]], 1, 0 + ; CHECK: [[COPY14:%[0-9]+]]:gpr32all = COPY [[ADDWri3]] + ; CHECK: Bcc 0, %bb.9, implicit $nzcv + ; CHECK: B %bb.10 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.9(0x30000000) + liveins: $x0, $x1, $w3 + + %23:gpr32common = COPY $w3 + %21:gpr64common = COPY $x1 + %20:gpr64common = COPY $x0 + %24:gpr32common = LDRWui %20, 0 :: (load 4 from %ir.read, !tbaa !6) + STRWui %23, %20, 0 :: (store 4 into %ir.read, !tbaa !6) + %25:gpr32 = SUBSWri %23, 1, 0, implicit-def $nzcv + Bcc 11, %bb.9, implicit $nzcv + B %bb.1 + + bb.1.for.body.lr.ph: + successors: %bb.3(0x30000000), %bb.2(0x50000000) + + %26:gpr64common = ADRP target-flags(aarch64-page) @G + %27:gpr32 = LDRWui %26, target-flags(aarch64-pageoff, aarch64-nc) @G :: (dereferenceable load 4 from @G, !tbaa !6) + CBZW killed %27, %bb.3 + B %bb.2 + + bb.2.for.body.preheader: + successors: %bb.10(0x80000000) + + %28:gpr32sp = ADDWri %24, 42, 0 + %1:gpr32all = COPY %28 + B %bb.10 + + bb.3.for.body.us.preheader: + successors: %bb.4(0x80000000) + + %2:gpr32all = COPY %25 + %34:gpr32sp = ADDWri %24, 42, 0 + %35:gpr32all = COPY $wzr + %32:gpr32all = COPY %35 + %3:gpr32all = COPY %34 + + bb.4.for.body.us: + successors: %bb.6(0x30000000), %bb.5(0x50000000) + + %4:gpr32common = PHI %3, %bb.3, %12, %bb.8 + %5:gpr32common = PHI %2, %bb.3, %11, %bb.8 + %6:gpr32 = PHI %32, %bb.3, %10, %bb.8 + %7:gpr32 = PHI %23, %bb.3, %9, %bb.8 + CBZW %6, %bb.6 + B %bb.5 + bb.5.if.then.us: + successors: %bb.7(0x80000000) + + %36:gpr32 = SDIVWr %7, %4 + %8:gpr32all = COPY %36 + B %bb.7 + + bb.6.if.else.us: + successors: %bb.7(0x80000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @H, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.7.for.inc.us: + successors: %bb.9(0x04000000), %bb.8(0x7c000000) + + %9:gpr32all = PHI %8, %bb.5, %7, %bb.6 + CBZW %5, %bb.9 + B %bb.8 + + bb.8.for.inc.us.for.body.us_crit_edge: + successors: %bb.4(0x80000000) + + %38:gpr32 = LDRWui %26, target-flags(aarch64-pageoff, aarch64-nc) @G :: (dereferenceable load 4 from @G, !tbaa !6) + %10:gpr32all = COPY %38 + %39:gpr32 = SUBSWri %5, 1, 0, implicit-def dead $nzcv + %11:gpr32all = COPY %39 + %40:gpr32sp = ADDWri %4, 1, 0 + %12:gpr32all = COPY %40 + B %bb.4 + + bb.9.for.cond.cleanup: + %13:gpr32 = PHI %23, %bb.0, %17, %bb.10, %9, %bb.7 + STRWui %13, %21, 0 :: (store 4 into %ir.write, !tbaa !6) + RET_ReallyLR + + bb.10.for.body: + successors: %bb.9(0x04000000), %bb.10(0x7c000000) + + %14:gpr32common = PHI %1, %bb.2, %19, %bb.10 + %15:gpr32sp = PHI %23, %bb.2, %18, %bb.10 + %16:gpr32 = PHI %23, %bb.2, %17, %bb.10 + %29:gpr32 = SDIVWr %16, %14 + %17:gpr32all = COPY %29 + %30:gpr32 = SUBSWri %15, 1, 0, implicit-def $nzcv + %18:gpr32all = COPY %30 + %31:gpr32sp = ADDWri %14, 1, 0 + %19:gpr32all = COPY %31 + Bcc 0, %bb.9, implicit $nzcv + B %bb.10 + +...