Index: llvm/lib/CodeGen/MachineSink.cpp =================================================================== --- llvm/lib/CodeGen/MachineSink.cpp +++ llvm/lib/CodeGen/MachineSink.cpp @@ -227,6 +227,11 @@ void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, SmallVectorImpl &Candidates); bool SinkIntoLoop(MachineLoop *L, MachineInstr &I); + bool IsSafeToMove(MachineInstr &I, MachineBasicBlock *SinkTo); + bool AreAliased(MachineInstr &First, MachineInstr &Second, + MachineBasicBlock *From, MachineBasicBlock *To, + DenseSet HandledDomBlocks, + bool &SawStore, bool &HasAliasedStore) ; bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -352,24 +357,6 @@ return true; } -/// Return true if this machine instruction loads from global offset table or -/// constant pool. -static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { - assert(MI.mayLoad() && "Expected MI that loads!"); - - // If we lost memory operands, conservatively assume that the instruction - // reads from everything.. - if (MI.memoperands_empty()) - return true; - - for (MachineMemOperand *MemOp : MI.memoperands()) - if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) - if (PSV->isGOT() || PSV->isConstantPool()) - return true; - - return false; -} - void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, SmallVectorImpl &Candidates) { for (auto &MI : *BB) { @@ -379,27 +366,27 @@ "target\n"); continue; } + // If physical registers are used, then this is marked as not loop + // invariant. This can be the case if the preheader is the entry block, and + // when there are copy instructions of function arguments that are passed + // through registers. if (!L->isLoopInvariant(MI)) { LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n"); continue; } - bool DontMoveAcrossStore = true; - if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n"); - continue; - } - if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n"); - continue; - } if (MI.isConvergent()) continue; + // This e.g. skips branche and store instructions. const MachineOperand &MO = MI.getOperand(0); - if (!MO.isReg() || !MO.getReg() || !MO.isDef()) + if (!MO.isReg() || !MO.getReg() || !MO.isDef()) { + LLVM_DEBUG(dbgs() << "LoopSink: Instruction does not define a value.\n"); continue; - if (!MRI->hasOneDef(MO.getReg())) + } + if (!MRI->hasOneDef(MO.getReg())) { + LLVM_DEBUG(dbgs() << "LoopSink: Instruction does not have 1 def.\n"); continue; + } LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n"); Candidates.push_back(&MI); @@ -455,6 +442,9 @@ EverMadeChange = true; } + HasStoreCache.clear(); + StoreInstrCache.clear(); + if (SinkInstsIntoLoop) { SmallVector Loops(LI->begin(), LI->end()); for (auto *L : Loops) { @@ -470,8 +460,13 @@ // of a def-use chain, if there is any. for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) { MachineInstr *I = *It; + + // TODO: This is conservative because we bail as soon as we find one + // instruction that cannot be sunk. Better is to do this per def-use + // chain, so we try a next chain if one fails. if (!SinkIntoLoop(L, *I)) break; + EverMadeChange = true; ++NumLoopSunk; } @@ -1155,29 +1150,10 @@ } for (MachineInstr &I : *BB) { - // Treat as alias conservatively for a call or an ordered memory - // operation. - if (I.isCall() || I.hasOrderedMemoryRef()) { - for (auto *DomBB : HandledDomBlocks) { - if (DomBB != BB && DT->dominates(DomBB, BB)) - HasStoreCache[std::make_pair(DomBB, To)] = true; - else if(DomBB != BB && DT->dominates(BB, DomBB)) - HasStoreCache[std::make_pair(From, DomBB)] = true; - } - HasStoreCache[BlockPair] = true; + bool Aliased = AreAliased(I, MI, From, To, HandledBlocks, SawStore, + HasAliasedStore); + if (Aliased && (I.isCall() || I.hasOrderedMemoryRef())) return true; - } - - if (I.mayStore()) { - SawStore = true; - // We still have chance to sink MI if all stores between are not - // aliased to MI. - // Cache all store instructions, so that we don't need to go through - // all From reachable blocks for next load instruction. - if (I.mayAlias(AA, MI, false)) - HasAliasedStore = true; - StoreInstrCache[BlockPair].push_back(&I); - } } } } @@ -1187,6 +1163,79 @@ return HasAliasedStore; } +bool MachineSinking::AreAliased(MachineInstr &First, MachineInstr &Second, + MachineBasicBlock *From, MachineBasicBlock *To, + DenseSet HandledDomBlocks, bool &SawStore, + bool &HasAliasedStore) { + MachineBasicBlock *BB = First.getParent(); + auto BlockPair = std::make_pair(From, To); + + if (First.isCall() || Second.hasOrderedMemoryRef()) { + for (auto *DomBB : HandledDomBlocks) { + if (DomBB != BB && DT->dominates(DomBB, BB)) + HasStoreCache[std::make_pair(DomBB, To)] = true; + else if(DomBB != BB && DT->dominates(BB, DomBB)) + HasStoreCache[std::make_pair(From, DomBB)] = true; + } + HasStoreCache[BlockPair] = true; + return true; + } + + if (First.mayStore()) { + SawStore = true; + // We still have chance to sink MI if all stores between are not + // aliased to MI. + // Cache all store instructions, so that we don't need to go through + // all From reachable blocks for next load instruction. + if (First.mayAlias(AA, Second, false)) + HasAliasedStore = true; + StoreInstrCache[BlockPair].push_back(&First); + } + + // If there is no store at all, cache the result. + if (!SawStore) + HasStoreCache[BlockPair] = false; + return HasAliasedStore; +} + +bool MachineSinking::IsSafeToMove(MachineInstr &I, MachineBasicBlock *SinkTo) { + auto End = I.getParent()->instr_end(); + auto It = I.getIterator(); + bool SawStore = false; + bool HasAliasedStore = false; + + // 1) First, analyse all instruction from the current instruction I to the end + // of its block. + It++; + for (; It != End; ++It) { + if (AreAliased(*It, I, I.getParent(), SinkTo, {}, SawStore, HasAliasedStore)) { + LLVM_DEBUG(dbgs() << "LoopSink: Alias pair found!\n"); + return false; + } + LLVM_DEBUG(dbgs() << "LoopSink: Not aliased with: " << *It); + } + + // 2) Check if we can move I to Sink, and see if there are any stores in + // between that are aliased. + bool DontMoveAcrossStore = hasStoreBetween(I.getParent(), SinkTo, I); + LLVM_DEBUG(dbgs() << "LoopSink: Found store in between: " + << DontMoveAcrossStore << "\n"); + if (!I.isSafeToMove(AA, DontMoveAcrossStore)) { + LLVM_DEBUG(dbgs() << "LoopSink: Not safe to move\n"); + return false; + } + + // 3) Check all instruction in the sink block, to see if they alias. + for (auto &CurI : *SinkTo) { + if (AreAliased(CurI, I, I.getParent(), SinkTo, {}, SawStore, HasAliasedStore)) { + LLVM_DEBUG(dbgs() << "LoopSink: Alias found in sink block: " << CurI); + return false; + } + } + LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not aliased, safe to move!\n"); + return true; +} + /// Sink instructions into loops if profitable. This especially tries to prevent /// register spills caused by register pressure if there is little to no /// overhead moving instructions into loops. @@ -1209,12 +1258,7 @@ // FIXME: Come up with a proper cost model that estimates whether sinking // the instruction (and thus possibly executing it on every loop // iteration) is more expensive than a register. - // For now assumes that copies are cheap and thus almost always worth it. - if (!MI.isCopy()) { - LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n"); - CanSink = false; - break; - } + if (!SinkBlock) { SinkBlock = MI.getParent(); LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: " @@ -1243,6 +1287,10 @@ LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n"); return false; } + if (!IsSafeToMove(I, SinkBlock)) { + LLVM_DEBUG(dbgs() << "LoopSink: Not safe to move\n"); + return false; + } LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n"); SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I); Index: llvm/test/CodeGen/AArch64/loop-sink.mir =================================================================== --- llvm/test/CodeGen/AArch64/loop-sink.mir +++ llvm/test/CodeGen/AArch64/loop-sink.mir @@ -151,7 +151,7 @@ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } - define dso_local void @sink_add(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32 %n) local_unnamed_addr #0 { + define dso_local void @sink_load_add_chain(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32 %n) local_unnamed_addr #0 { entry: %0 = load i32, i32* %read, align 4, !tbaa !6 %cmp10 = icmp sgt i32 %n, 0 @@ -204,7 +204,7 @@ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10 } - define dso_local void @aliased_store_after_add(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32* nocapture %store, i32 %n) local_unnamed_addr #0 { + define dso_local void @aliased_store_imm_after_add(i32* noalias nocapture readonly %read, i32* noalias nocapture %write, i32* nocapture %store, i32 %n) local_unnamed_addr #0 { entry: %0 = load i32, i32* %read, align 4, !tbaa !6 %cmp10 = icmp sgt i32 %n, 0 @@ -231,6 +231,32 @@ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10 } + define dso_local void @aliased_store_after_load(i32* noalias nocapture %read, i32* noalias nocapture %write, i32* nocapture readnone %store, i32 %n) local_unnamed_addr #0 { + entry: + %cmp12 = icmp sgt i32 %n, 0 + br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup + + for.body.preheader: ; preds = %entry + %0 = load i32, i32* %read, align 4, !tbaa !6 + store i32 %n, i32* %read, align 4, !tbaa !6 + %1 = add i32 %0, 42 + br label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ] + store i32 %sum.0.lcssa, i32* %write, align 4, !tbaa !6 + ret void + + for.body: ; preds = %for.body.preheader, %for.body + %lsr.iv1 = phi i32 [ %1, %for.body.preheader ], [ %lsr.iv.next2, %for.body ] + %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] + %sum.013 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ] + %div = sdiv i32 %sum.013, %lsr.iv1 + %lsr.iv.next = add i32 %lsr.iv, -1 + %lsr.iv.next2 = add i32 %lsr.iv1, 1 + %exitcond.not = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10 + } declare i32 @use(i32) declare void @_Z6assignPj(i32*) @@ -336,12 +362,11 @@ ; CHECK: [[COPY5:%[0-9]+]]:gpr64all = COPY [[ADDXri3]] ; CHECK: [[ADDXri4:%[0-9]+]]:gpr64sp = nuw ADDXri [[COPY]], 20, 0 ; CHECK: [[COPY6:%[0-9]+]]:gpr64all = COPY [[ADDXri4]] - ; CHECK: [[ADDXri5:%[0-9]+]]:gpr64sp = ADDXri [[COPY1]], 1, 0 - ; CHECK: [[COPY7:%[0-9]+]]:gpr64all = COPY [[ADDXri5]] - ; CHECK: [[MOVaddrJT:%[0-9]+]]:gpr64 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0 ; CHECK: bb.1..backedge: ; CHECK: successors: %bb.9(0x09249249), %bb.2(0x76db6db7) - ; CHECK: [[PHI:%[0-9]+]]:gpr64sp = PHI [[COPY7]], %bb.0, %7, %bb.9 + ; CHECK: [[PHI:%[0-9]+]]:gpr64sp = PHI %5, %bb.0, %7, %bb.9 + ; CHECK: [[ADDXri5:%[0-9]+]]:gpr64sp = ADDXri [[COPY1]], 1, 0 + ; CHECK: [[COPY7:%[0-9]+]]:gpr64all = COPY [[ADDXri5]] ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[PHI]], 0 :: (load 1 from %ir.lsr.iv) ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, killed [[LDRBBui]], %subreg.sub_32 ; CHECK: [[COPY8:%[0-9]+]]:gpr32sp = COPY [[SUBREG_TO_REG]].sub_32 @@ -349,6 +374,7 @@ ; CHECK: Bcc 8, %bb.9, implicit $nzcv ; CHECK: bb.2..backedge: ; CHECK: successors: %bb.3(0x13b13b14), %bb.9(0x09d89d8a), %bb.4(0x13b13b14), %bb.5(0x13b13b14), %bb.6(0x13b13b14), %bb.7(0x13b13b14), %bb.8(0x13b13b14) + ; CHECK: [[MOVaddrJT:%[0-9]+]]:gpr64 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0 ; CHECK: early-clobber %21:gpr64, early-clobber %22:gpr64sp = JumpTableDest32 [[MOVaddrJT]], [[SUBREG_TO_REG]], %jump-table.0 ; CHECK: BR killed %21 ; CHECK: bb.3 (%ir-block.8): @@ -931,8 +957,6 @@ ; CHECK: B %bb.1 ; CHECK: bb.1.for.body.preheader: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) ; CHECK: B %bb.3 ; CHECK: bb.2.for.cond.cleanup: ; CHECK: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %4, %bb.3 @@ -942,6 +966,8 @@ ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) ; CHECK: [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %5, %bb.3 ; CHECK: [[PHI2:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %4, %bb.3 + ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI2]], [[LDRWui]] ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SDIVWr]] ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv @@ -983,7 +1009,7 @@ ... --- -name: sink_add +name: sink_load_add_chain alignment: 16 exposesReturnsTwice: false legalized: false @@ -1041,7 +1067,7 @@ constants: [] machineFunctionInfo: {} body: | - ; CHECK-LABEL: name: sink_add + ; CHECK-LABEL: name: sink_load_add_chain ; CHECK: bb.0.entry: ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) ; CHECK: liveins: $x0, $x1, $w2 @@ -1053,9 +1079,6 @@ ; CHECK: B %bb.1 ; CHECK: bb.1.for.body.preheader: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY2]], 0 :: (load 4 from %ir.read, !tbaa !0) - ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0 - ; CHECK: [[COPY3:%[0-9]+]]:gpr32all = COPY [[ADDWri]] ; CHECK: B %bb.3 ; CHECK: bb.2.for.cond.cleanup: ; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.0, %6, %bb.3 @@ -1063,9 +1086,12 @@ ; CHECK: RET_ReallyLR ; CHECK: bb.3.for.body: ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) - ; CHECK: [[PHI1:%[0-9]+]]:gpr32common = PHI [[COPY3]], %bb.1, %8, %bb.3 + ; CHECK: [[PHI1:%[0-9]+]]:gpr32common = PHI %1, %bb.1, %8, %bb.3 ; CHECK: [[PHI2:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %7, %bb.3 ; CHECK: [[PHI3:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %6, %bb.3 + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY2]], 0 :: (load 4 from %ir.read, !tbaa !0) + ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0 + ; CHECK: [[COPY3:%[0-9]+]]:gpr32all = COPY [[ADDWri]] ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI3]], [[PHI1]] ; CHECK: [[COPY4:%[0-9]+]]:gpr32all = COPY [[SDIVWr]] ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI2]], 1, 0, implicit-def $nzcv @@ -1256,7 +1282,7 @@ ... --- -name: aliased_store_after_add +name: aliased_store_imm_after_add alignment: 16 exposesReturnsTwice: false legalized: false @@ -1317,7 +1343,7 @@ constants: [] machineFunctionInfo: {} body: | - ; CHECK-LABEL: name: aliased_store_after_add + ; CHECK-LABEL: name: aliased_store_imm_after_add ; CHECK: bb.0.entry: ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) ; CHECK: liveins: $x0, $x1, $x2, $w3 @@ -1396,4 +1422,138 @@ B %bb.3 ... +--- +name: aliased_store_after_load +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gpr32sp, preferred-register: '' } + - { id: 1, class: gpr32all, preferred-register: '' } + - { id: 2, class: gpr32, preferred-register: '' } + - { id: 3, class: gpr32common, preferred-register: '' } + - { id: 4, class: gpr32sp, preferred-register: '' } + - { id: 5, class: gpr32, preferred-register: '' } + - { id: 6, class: gpr32all, preferred-register: '' } + - { id: 7, class: gpr32all, preferred-register: '' } + - { id: 8, class: gpr32all, preferred-register: '' } + - { id: 9, class: gpr64common, preferred-register: '' } + - { id: 10, class: gpr64common, preferred-register: '' } + - { id: 11, class: gpr64, preferred-register: '' } + - { id: 12, class: gpr32common, preferred-register: '' } + - { id: 13, class: gpr32common, preferred-register: '' } + - { id: 14, class: gpr32, preferred-register: '' } + - { id: 15, class: gpr32sp, preferred-register: '' } + - { id: 16, class: gpr32, preferred-register: '' } + - { id: 17, class: gpr32, preferred-register: '' } + - { id: 18, class: gpr32sp, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%9' } + - { reg: '$x1', virtual-reg: '%10' } + - { reg: '$w3', virtual-reg: '%12' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: aliased_store_after_load + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $x0, $x1, $w3 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w3 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: Bcc 11, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1.for.body.preheader: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY2]], 0 :: (load 4 from %ir.read, !tbaa !0) + ; CHECK: STRWui [[COPY]], [[COPY2]], 0 :: (store 4 into %ir.read, !tbaa !0) + ; CHECK: B %bb.3 + ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.0, %6, %bb.3 + ; CHECK: STRWui [[PHI]], [[COPY1]], 0 :: (store 4 into %ir.write, !tbaa !0) + ; CHECK: RET_ReallyLR + ; CHECK: bb.3.for.body: + ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) + ; CHECK: [[PHI1:%[0-9]+]]:gpr32common = PHI %1, %bb.1, %8, %bb.3 + ; CHECK: [[PHI2:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %7, %bb.3 + ; CHECK: [[PHI3:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %6, %bb.3 + ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0 + ; CHECK: [[COPY3:%[0-9]+]]:gpr32all = COPY [[ADDWri]] + ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI3]], [[PHI1]] + ; CHECK: [[COPY4:%[0-9]+]]:gpr32all = COPY [[SDIVWr]] + ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI2]], 1, 0, implicit-def $nzcv + ; CHECK: [[COPY5:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]] + ; CHECK: [[ADDWri1:%[0-9]+]]:gpr32sp = ADDWri [[PHI1]], 1, 0 + ; CHECK: [[COPY6:%[0-9]+]]:gpr32all = COPY [[ADDWri1]] + ; CHECK: Bcc 0, %bb.2, implicit $nzcv + ; CHECK: B %bb.3 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $x0, $x1, $w3 + + %12:gpr32common = COPY $w3 + %10:gpr64common = COPY $x1 + %9:gpr64common = COPY $x0 + %14:gpr32 = SUBSWri %12, 1, 0, implicit-def $nzcv + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 + + bb.1.for.body.preheader: + successors: %bb.3(0x80000000) + + %13:gpr32common = LDRWui %9, 0 :: (load 4 from %ir.read, !tbaa !6) + STRWui %12, %9, 0 :: (store 4 into %ir.read, !tbaa !6) + %15:gpr32sp = ADDWri %13, 42, 0 + %1:gpr32all = COPY %15 + B %bb.3 + + bb.2.for.cond.cleanup: + %2:gpr32 = PHI %12, %bb.0, %6, %bb.3 + STRWui %2, %10, 0 :: (store 4 into %ir.write, !tbaa !6) + RET_ReallyLR + bb.3.for.body: + successors: %bb.2(0x04000000), %bb.3(0x7c000000) + + %3:gpr32common = PHI %1, %bb.1, %8, %bb.3 + %4:gpr32sp = PHI %12, %bb.1, %7, %bb.3 + %5:gpr32 = PHI %12, %bb.1, %6, %bb.3 + %16:gpr32 = SDIVWr %5, %3 + %6:gpr32all = COPY %16 + %17:gpr32 = SUBSWri %4, 1, 0, implicit-def $nzcv + %7:gpr32all = COPY %17 + %18:gpr32sp = ADDWri %3, 1, 0 + %8:gpr32all = COPY %18 + Bcc 0, %bb.2, implicit $nzcv + B %bb.3 + +...