Index: llvm/lib/CodeGen/MachineLICM.cpp =================================================================== --- llvm/lib/CodeGen/MachineLICM.cpp +++ llvm/lib/CodeGen/MachineLICM.cpp @@ -69,11 +69,6 @@ cl::init(false), cl::Hidden); static cl::opt -SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", - cl::desc("MachineLICM should sink instructions into " - "loops to avoid register spills"), - cl::init(false), cl::Hidden); -static cl::opt HoistConstStores("hoist-const-stores", cl::desc("Hoist invariant stores"), cl::init(true), cl::Hidden); @@ -246,8 +241,6 @@ void HoistOutOfLoop(MachineDomTreeNode *HeaderN); - void SinkIntoLoop(); - void InitRegPressure(MachineBasicBlock *BB); DenseMap calcRegisterCost(const MachineInstr *MI, @@ -395,9 +388,6 @@ FirstInLoop = true; HoistOutOfLoop(N); CSEMap.clear(); - - if (SinkInstsToAvoidSpills) - SinkIntoLoop(); } } @@ -787,88 +777,6 @@ } } -/// Sink instructions into loops if profitable. This especially tries to prevent -/// register spills caused by register pressure if there is little to no -/// overhead moving instructions into loops. -void MachineLICMBase::SinkIntoLoop() { - MachineBasicBlock *Preheader = getCurPreheader(); - if (!Preheader) - return; - - SmallVector Candidates; - for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); - I != Preheader->instr_end(); ++I) { - // We need to ensure that we can safely move this instruction into the loop. - // As such, it must not have side-effects, e.g. such as a call has. - LLVM_DEBUG(dbgs() << "LICM: Analysing sink candidate: " << *I); - if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) { - LLVM_DEBUG(dbgs() << "LICM: Added as sink candidate.\n"); - Candidates.push_back(&*I); - continue; - } - LLVM_DEBUG(dbgs() << "LICM: Not added as sink candidate.\n"); - } - - for (MachineInstr *I : Candidates) { - const MachineOperand &MO = I->getOperand(0); - if (!MO.isDef() || !MO.isReg() || !MO.getReg()) - continue; - if (!MRI->hasOneDef(MO.getReg())) - continue; - bool CanSink = true; - MachineBasicBlock *SinkBlock = nullptr; - LLVM_DEBUG(dbgs() << "LICM: Try sinking: " << *I); - - for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { - LLVM_DEBUG(dbgs() << "LICM: Analysing use: "; MI.dump()); - // FIXME: Come up with a proper cost model that estimates whether sinking - // the instruction (and thus possibly executing it on every loop - // iteration) is more expensive than a register. - // For now assumes that copies are cheap and thus almost always worth it. - if (!MI.isCopy()) { - CanSink = false; - break; - } - if (!SinkBlock) { - SinkBlock = MI.getParent(); - LLVM_DEBUG(dbgs() << "LICM: Setting sink block to: " - << printMBBReference(*SinkBlock) << "\n"); - continue; - } - SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); - if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "LICM: Can't find nearest dominator\n"); - CanSink = false; - break; - } - LLVM_DEBUG(dbgs() << "LICM: Setting nearest common dom block: " << - printMBBReference(*SinkBlock) << "\n"); - } - if (!CanSink) { - LLVM_DEBUG(dbgs() << "LICM: Can't sink instruction.\n"); - continue; - } - if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "LICM: Not sinking, can't find sink block.\n"); - continue; - } - if (SinkBlock == Preheader) { - LLVM_DEBUG(dbgs() << "LICM: Not sinking, sink block is the preheader\n"); - continue; - } - - LLVM_DEBUG(dbgs() << "LICM: Sinking to " << printMBBReference(*SinkBlock) - << " from " << printMBBReference(*I->getParent()) - << ": " << *I); - SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I); - - // The instruction is moved from its basic block, so do not retain the - // debug information. - assert(!I->isDebugInstr() && "Should not sink debug inst"); - I->setDebugLoc(DebugLoc()); - } -} - static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); } Index: llvm/lib/CodeGen/MachineSink.cpp =================================================================== --- llvm/lib/CodeGen/MachineSink.cpp +++ llvm/lib/CodeGen/MachineSink.cpp @@ -91,7 +91,14 @@ "the straight line is higher than this threshold."), cl::init(20), cl::Hidden); +static cl::opt +SinkInstsIntoLoop("sink-insts-to-avoid-spills", + cl::desc("Sink instructions into loops to avoid " + "register spills"), + cl::init(false), cl::Hidden); + STATISTIC(NumSunk, "Number of machine instructions sunk"); +STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); @@ -216,6 +223,11 @@ bool &LocalUse) const; MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); + + void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, + SmallVectorImpl &Candidates); + bool SinkIntoLoop(MachineLoop *L, MachineInstr &I); + bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *SuccToSinkTo, @@ -340,6 +352,60 @@ return true; } +/// Return true if this machine instruction loads from global offset table or +/// constant pool. +static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { + assert(MI.mayLoad() && "Expected MI that loads!"); + + // If we lost memory operands, conservatively assume that the instruction + // reads from everything.. + if (MI.memoperands_empty()) + return true; + + for (MachineMemOperand *MemOp : MI.memoperands()) + if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) + if (PSV->isGOT() || PSV->isConstantPool()) + return true; + + return false; +} + +void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, + SmallVectorImpl &Candidates) { + for (auto &MI : *BB) { + LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI); + if (!TII->shouldSink(MI)) { + LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this " + "target\n"); + continue; + } + if (!L->isLoopInvariant(MI)) { + LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n"); + continue; + } + bool DontMoveAcrossStore = true; + if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) { + LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n"); + continue; + } + if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { + LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n"); + continue; + } + if (MI.isConvergent()) + continue; + + const MachineOperand &MO = MI.getOperand(0); + if (!MO.isReg() || !MO.getReg() || !MO.isDef()) + continue; + if (!MRI->hasOneDef(MO.getReg())) + continue; + + LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n"); + Candidates.push_back(&MI); + } +} + bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -389,6 +455,29 @@ EverMadeChange = true; } + if (SinkInstsIntoLoop) { + SmallVector Loops(LI->begin(), LI->end()); + for (auto *L : Loops) { + MachineBasicBlock *Preheader = LI->findLoopPreheader(L); + if (!Preheader) { + LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n"); + continue; + } + SmallVector Candidates; + FindLoopSinkCandidates(L, Preheader, Candidates); + + // Walk the candidates in reverse order so that we start with the use + // of a def-use chain, if there is any. + for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) { + MachineInstr *I = *It; + if (!SinkIntoLoop(L, *I)) + break; + EverMadeChange = true; + ++NumLoopSunk; + } + } + } + HasStoreCache.clear(); StoreInstrCache.clear(); @@ -1098,6 +1187,73 @@ return HasAliasedStore; } +/// Sink instructions into loops if profitable. This especially tries to prevent +/// register spills caused by register pressure if there is little to no +/// overhead moving instructions into loops. +bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) { + LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I); + MachineBasicBlock *Preheader = L->getLoopPreheader(); + assert(Preheader && "Loop sink needs a preheader block"); + MachineBasicBlock *SinkBlock = nullptr; + bool CanSink = true; + const MachineOperand &MO = I.getOperand(0); + + for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { + LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI); + if (!L->contains(&MI)) { + LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n"); + CanSink = false; + break; + } + + // FIXME: Come up with a proper cost model that estimates whether sinking + // the instruction (and thus possibly executing it on every loop + // iteration) is more expensive than a register. + // For now assumes that copies are cheap and thus almost always worth it. + if (!MI.isCopy()) { + LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n"); + CanSink = false; + break; + } + if (!SinkBlock) { + SinkBlock = MI.getParent(); + LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: " + << printMBBReference(*SinkBlock) << "\n"); + continue; + } + SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); + if (!SinkBlock) { + LLVM_DEBUG(dbgs() << "LoopSink: Can't find nearest dominator\n"); + CanSink = false; + break; + } + LLVM_DEBUG(dbgs() << "LoopSink: Setting nearest common dom block: " << + printMBBReference(*SinkBlock) << "\n"); + } + + if (!CanSink) { + LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n"); + return false; + } + if (!SinkBlock) { + LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n"); + return false; + } + if (SinkBlock == Preheader) { + LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n"); + return false; + } + + LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n"); + SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I); + + // The instruction is moved from its basic block, so do not retain the + // debug information. + assert(!I.isDebugInstr() && "Should not sink debug inst"); + I.setDebugLoc(DebugLoc()); + return true; +} + /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, Index: llvm/test/CodeGen/AArch64/loop-sink.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/loop-sink.mir @@ -0,0 +1,896 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills %s -o - 2>&1 | FileCheck %s +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64" + + @A = external dso_local global [100 x i32], align 4 + %struct.A = type { i32, i32, i32, i32, i32, i32 } + + define void @sink_adds(i8* nocapture readonly %input, %struct.A* %a) { + %1 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 1 + %2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 2 + %3 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 3 + %4 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 4 + %5 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 5 + %scevgep = getelementptr i8, i8* %input, i64 1 + br label %.backedge + + .backedge: ; preds = %.backedge.backedge, %0 + %lsr.iv = phi i8* [ %scevgep1, %.backedge.backedge ], [ %scevgep, %0 ] + %6 = load i8, i8* %lsr.iv, align 1 + %7 = zext i8 %6 to i32 + switch i32 %7, label %.backedge.backedge [ + i32 0, label %8 + i32 10, label %10 + i32 20, label %11 + i32 30, label %12 + i32 40, label %13 + i32 50, label %14 + ] + + 8: ; preds = %.backedge + %9 = bitcast %struct.A* %a to i32* + tail call void @_Z6assignPj(i32* %9) + br label %.backedge.backedge + + 10: ; preds = %.backedge + tail call void @_Z6assignPj(i32* %1) + br label %.backedge.backedge + + 11: ; preds = %.backedge + tail call void @_Z6assignPj(i32* %2) + br label %.backedge.backedge + + 12: ; preds = %.backedge + tail call void @_Z6assignPj(i32* %3) + br label %.backedge.backedge + + 13: ; preds = %.backedge + tail call void @_Z6assignPj(i32* %4) + br label %.backedge.backedge + + 14: ; preds = %.backedge + tail call void @_Z6assignPj(i32* %5) + br label %.backedge.backedge + + .backedge.backedge: ; preds = %14, %13, %12, %11, %10, %8, %.backedge + %scevgep1 = getelementptr i8, i8* %lsr.iv, i64 1 + br label %.backedge + } + + define i32 @load_not_safe_to_move_consecutive_call(i32 %n) { + entry: + %cmp63 = icmp sgt i32 %n, 0 + br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup + + for.body.preheader: ; preds = %entry + %0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0), align 4 + %call0 = tail call i32 @use(i32 %n) + br label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ] + ret i32 %sum.0.lcssa + + for.body: ; preds = %for.body, %for.body.preheader + %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] + %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ] + %div = sdiv i32 %sum.065, %0 + %lsr.iv.next = add i32 %lsr.iv, -1 + %exitcond.not = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + } + + define i32 @load_not_safe_to_move_consecutive_call_use(i32 %n) { + entry: + %cmp63 = icmp sgt i32 %n, 0 + br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup + + for.body.preheader: ; preds = %entry + %0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0), align 4 + %call0 = tail call i32 @use(i32 %0) + br label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ] + ret i32 %sum.0.lcssa + + for.body: ; preds = %for.body, %for.body.preheader + %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] + %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ] + %div = sdiv i32 %sum.065, %0 + %lsr.iv.next = add i32 %lsr.iv, -1 + %exitcond.not = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + } + + define i32 @cant_sink_use_outside_loop(i32 %n) { + entry: + %cmp63 = icmp sgt i32 %n, 0 + br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup + + for.body.preheader: ; preds = %entry + %0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0), align 4 + br label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ] + %use.outside.loop = phi i32 [ 0, %entry ], [ %0, %for.body ] + %call = tail call i32 @use(i32 %use.outside.loop) + ret i32 %sum.0.lcssa + + for.body: ; preds = %for.body, %for.body.preheader + %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] + %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ] + %div = sdiv i32 %sum.065, %sum.065 + %lsr.iv.next = add i32 %lsr.iv, -1 + %exitcond.not = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + } + + define i32 @use_is_not_a_copy(i32 %n) { + entry: + %cmp63 = icmp sgt i32 %n, 0 + br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup + + for.body.preheader: ; preds = %entry + %0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0), align 4 + br label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ] + ret i32 %sum.0.lcssa + + for.body: ; preds = %for.body, %for.body.preheader + %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] + %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ] + %div = sdiv i32 %sum.065, %0 + %lsr.iv.next = add i32 %lsr.iv, -1 + %exitcond.not = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + } + + declare i32 @use(i32) + declare void @_Z6assignPj(i32*) + +... +--- +name: sink_adds +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gpr64all, preferred-register: '' } + - { id: 1, class: gpr64all, preferred-register: '' } + - { id: 2, class: gpr64all, preferred-register: '' } + - { id: 3, class: gpr64all, preferred-register: '' } + - { id: 4, class: gpr64all, preferred-register: '' } + - { id: 5, class: gpr64all, preferred-register: '' } + - { id: 6, class: gpr64sp, preferred-register: '' } + - { id: 7, class: gpr64all, preferred-register: '' } + - { id: 8, class: gpr64common, preferred-register: '' } + - { id: 9, class: gpr64common, preferred-register: '' } + - { id: 10, class: gpr64sp, preferred-register: '' } + - { id: 11, class: gpr64sp, preferred-register: '' } + - { id: 12, class: gpr64sp, preferred-register: '' } + - { id: 13, class: gpr64sp, preferred-register: '' } + - { id: 14, class: gpr64sp, preferred-register: '' } + - { id: 15, class: gpr64sp, preferred-register: '' } + - { id: 16, class: gpr64, preferred-register: '' } + - { id: 17, class: gpr32, preferred-register: '' } + - { id: 18, class: gpr32sp, preferred-register: '' } + - { id: 19, class: gpr32, preferred-register: '' } + - { id: 20, class: gpr64, preferred-register: '' } + - { id: 21, class: gpr64, preferred-register: '' } + - { id: 22, class: gpr64sp, preferred-register: '' } + - { id: 23, class: gpr64sp, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%8' } + - { reg: '$x1', virtual-reg: '%9' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +jumpTable: + kind: block-address + entries: + - id: 0 + blocks: [ '%bb.2', '%bb.8', '%bb.8', '%bb.8', '%bb.8', '%bb.8', + '%bb.8', '%bb.8', '%bb.8', '%bb.8', '%bb.3', '%bb.8', + '%bb.8', '%bb.8', '%bb.8', '%bb.8', '%bb.8', '%bb.8', + '%bb.8', '%bb.8', '%bb.4', '%bb.8', '%bb.8', '%bb.8', + '%bb.8', '%bb.8', '%bb.8', '%bb.8', '%bb.8', '%bb.8', + '%bb.5', '%bb.8', '%bb.8', '%bb.8', '%bb.8', '%bb.8', + '%bb.8', '%bb.8', '%bb.8', '%bb.8', '%bb.6', '%bb.8', + '%bb.8', '%bb.8', '%bb.8', '%bb.8', '%bb.8', '%bb.8', + '%bb.8', '%bb.8', '%bb.7' ] +body: | + ; CHECK-LABEL: name: sink_adds + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: [[ADDXri:%[0-9]+]]:gpr64sp = nuw ADDXri [[COPY]], 4, 0 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64all = COPY [[ADDXri]] + ; CHECK: [[ADDXri1:%[0-9]+]]:gpr64sp = nuw ADDXri [[COPY]], 8, 0 + ; CHECK: [[COPY3:%[0-9]+]]:gpr64all = COPY [[ADDXri1]] + ; CHECK: [[ADDXri2:%[0-9]+]]:gpr64sp = nuw ADDXri [[COPY]], 12, 0 + ; CHECK: [[COPY4:%[0-9]+]]:gpr64all = COPY [[ADDXri2]] + ; CHECK: [[ADDXri3:%[0-9]+]]:gpr64sp = nuw ADDXri [[COPY]], 16, 0 + ; CHECK: [[COPY5:%[0-9]+]]:gpr64all = COPY [[ADDXri3]] + ; CHECK: [[ADDXri4:%[0-9]+]]:gpr64sp = nuw ADDXri [[COPY]], 20, 0 + ; CHECK: [[COPY6:%[0-9]+]]:gpr64all = COPY [[ADDXri4]] + ; CHECK: [[ADDXri5:%[0-9]+]]:gpr64sp = ADDXri [[COPY1]], 1, 0 + ; CHECK: [[COPY7:%[0-9]+]]:gpr64all = COPY [[ADDXri5]] + ; CHECK: [[MOVaddrJT:%[0-9]+]]:gpr64 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0 + ; CHECK: bb.1..backedge: + ; CHECK: successors: %bb.9(0x09249249), %bb.2(0x76db6db7) + ; CHECK: [[PHI:%[0-9]+]]:gpr64sp = PHI [[COPY7]], %bb.0, %7, %bb.9 + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[PHI]], 0 :: (load 1 from %ir.lsr.iv) + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, killed [[LDRBBui]], %subreg.sub_32 + ; CHECK: [[COPY8:%[0-9]+]]:gpr32sp = COPY [[SUBREG_TO_REG]].sub_32 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri killed [[COPY8]], 50, 0, implicit-def $nzcv + ; CHECK: Bcc 8, %bb.9, implicit $nzcv + ; CHECK: bb.2..backedge: + ; CHECK: successors: %bb.3(0x13b13b14), %bb.9(0x09d89d8a), %bb.4(0x13b13b14), %bb.5(0x13b13b14), %bb.6(0x13b13b14), %bb.7(0x13b13b14), %bb.8(0x13b13b14) + ; CHECK: early-clobber %21:gpr64, early-clobber %22:gpr64sp = JumpTableDest32 [[MOVaddrJT]], [[SUBREG_TO_REG]], %jump-table.0 + ; CHECK: BR killed %21 + ; CHECK: bb.3 (%ir-block.8): + ; CHECK: successors: %bb.9(0x80000000) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY]] + ; CHECK: BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: B %bb.9 + ; CHECK: bb.4 (%ir-block.10): + ; CHECK: successors: %bb.9(0x80000000) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY2]] + ; CHECK: BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: B %bb.9 + ; CHECK: bb.5 (%ir-block.11): + ; CHECK: successors: %bb.9(0x80000000) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY3]] + ; CHECK: BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: B %bb.9 + ; CHECK: bb.6 (%ir-block.12): + ; CHECK: successors: %bb.9(0x80000000) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY4]] + ; CHECK: BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: B %bb.9 + ; CHECK: bb.7 (%ir-block.13): + ; CHECK: successors: %bb.9(0x80000000) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY5]] + ; CHECK: BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: B %bb.9 + ; CHECK: bb.8 (%ir-block.14): + ; CHECK: successors: %bb.9(0x80000000) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY6]] + ; CHECK: BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: bb.9..backedge.backedge: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[ADDXri6:%[0-9]+]]:gpr64sp = ADDXri [[PHI]], 1, 0 + ; CHECK: [[COPY9:%[0-9]+]]:gpr64all = COPY [[ADDXri6]] + ; CHECK: B %bb.1 + bb.0 (%ir-block.0): + successors: %bb.1(0x80000000) + liveins: $x0, $x1 + + %9:gpr64common = COPY $x1 + %8:gpr64common = COPY $x0 + %10:gpr64sp = nuw ADDXri %9, 4, 0 + %0:gpr64all = COPY %10 + %11:gpr64sp = nuw ADDXri %9, 8, 0 + %1:gpr64all = COPY %11 + %12:gpr64sp = nuw ADDXri %9, 12, 0 + %2:gpr64all = COPY %12 + %13:gpr64sp = nuw ADDXri %9, 16, 0 + %3:gpr64all = COPY %13 + %14:gpr64sp = nuw ADDXri %9, 20, 0 + %4:gpr64all = COPY %14 + %15:gpr64sp = ADDXri %8, 1, 0 + %5:gpr64all = COPY %15 + %20:gpr64 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0 + + bb.1..backedge: + successors: %bb.8(0x09249249), %bb.9(0x76db6db7) + + %6:gpr64sp = PHI %5, %bb.0, %7, %bb.8 + %17:gpr32 = LDRBBui %6, 0 :: (load 1 from %ir.lsr.iv) + %16:gpr64 = SUBREG_TO_REG 0, killed %17, %subreg.sub_32 + %18:gpr32sp = COPY %16.sub_32 + %19:gpr32 = SUBSWri killed %18, 50, 0, implicit-def $nzcv + Bcc 8, %bb.8, implicit $nzcv + + bb.9..backedge: + successors: %bb.2(0x13b13b14), %bb.8(0x09d89d8a), %bb.3(0x13b13b14), %bb.4(0x13b13b14), %bb.5(0x13b13b14), %bb.6(0x13b13b14), %bb.7(0x13b13b14) + + early-clobber %21:gpr64, early-clobber %22:gpr64sp = JumpTableDest32 %20, %16, %jump-table.0 + BR killed %21 + + bb.2 (%ir-block.8): + successors: %bb.8(0x80000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = COPY %9 + BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.8 + + bb.3 (%ir-block.10): + successors: %bb.8(0x80000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = COPY %0 + BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.8 + + bb.4 (%ir-block.11): + successors: %bb.8(0x80000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = COPY %1 + BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.8 + + bb.5 (%ir-block.12): + successors: %bb.8(0x80000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = COPY %2 + BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.8 + + bb.6 (%ir-block.13): + successors: %bb.8(0x80000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = COPY %3 + BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.8 + + bb.7 (%ir-block.14): + successors: %bb.8(0x80000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = COPY %4 + BL @_Z6assignPj, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.8..backedge.backedge: + successors: %bb.1(0x80000000) + + %23:gpr64sp = ADDXri %6, 1, 0 + %7:gpr64all = COPY %23 + B %bb.1 + +... +--- +name: load_not_safe_to_move_consecutive_call +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gpr32, preferred-register: '' } + - { id: 1, class: gpr32all, preferred-register: '' } + - { id: 2, class: gpr32sp, preferred-register: '' } + - { id: 3, class: gpr32, preferred-register: '' } + - { id: 4, class: gpr32all, preferred-register: '' } + - { id: 5, class: gpr32all, preferred-register: '' } + - { id: 6, class: gpr32common, preferred-register: '' } + - { id: 7, class: gpr32, preferred-register: '' } + - { id: 8, class: gpr64common, preferred-register: '' } + - { id: 9, class: gpr32, preferred-register: '' } + - { id: 10, class: gpr32all, preferred-register: '' } + - { id: 11, class: gpr32, preferred-register: '' } + - { id: 12, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$w0', virtual-reg: '%6' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: load_not_safe_to_move_consecutive_call + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: Bcc 11, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1.for.body.preheader: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: $w0 = COPY [[COPY]] + ; CHECK: BL @use, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: B %bb.3 + ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %4, %bb.3 + ; CHECK: $w0 = COPY [[PHI]] + ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK: bb.3.for.body: + ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) + ; CHECK: [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %5, %bb.3 + ; CHECK: [[PHI2:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %4, %bb.3 + ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI2]], [[LDRWui]] + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SDIVWr]] + ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv + ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]] + ; CHECK: Bcc 0, %bb.2, implicit $nzcv + ; CHECK: B %bb.3 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $w0 + + %6:gpr32common = COPY $w0 + %7:gpr32 = SUBSWri %6, 1, 0, implicit-def $nzcv + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 + + bb.1.for.body.preheader: + successors: %bb.3(0x80000000) + + %8:gpr64common = ADRP target-flags(aarch64-page) @A + %9:gpr32 = LDRWui killed %8, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $w0 = COPY %6 + BL @use, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.3 + + bb.2.for.cond.cleanup: + %1:gpr32all = PHI %6, %bb.0, %4, %bb.3 + $w0 = COPY %1 + RET_ReallyLR implicit $w0 + + bb.3.for.body: + successors: %bb.2(0x04000000), %bb.3(0x7c000000) + + %2:gpr32sp = PHI %6, %bb.1, %5, %bb.3 + %3:gpr32 = PHI %6, %bb.1, %4, %bb.3 + %11:gpr32 = SDIVWr %3, %9 + %4:gpr32all = COPY %11 + %12:gpr32 = SUBSWri %2, 1, 0, implicit-def $nzcv + %5:gpr32all = COPY %12 + Bcc 0, %bb.2, implicit $nzcv + B %bb.3 + +... +--- +name: load_not_safe_to_move_consecutive_call_use +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gpr32, preferred-register: '' } + - { id: 1, class: gpr32all, preferred-register: '' } + - { id: 2, class: gpr32sp, preferred-register: '' } + - { id: 3, class: gpr32, preferred-register: '' } + - { id: 4, class: gpr32all, preferred-register: '' } + - { id: 5, class: gpr32all, preferred-register: '' } + - { id: 6, class: gpr32common, preferred-register: '' } + - { id: 7, class: gpr32, preferred-register: '' } + - { id: 8, class: gpr64common, preferred-register: '' } + - { id: 9, class: gpr32, preferred-register: '' } + - { id: 10, class: gpr32all, preferred-register: '' } + - { id: 11, class: gpr32, preferred-register: '' } + - { id: 12, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$w0', virtual-reg: '%6' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: load_not_safe_to_move_consecutive_call_use + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: Bcc 11, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1.for.body.preheader: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: $w0 = COPY [[LDRWui]] + ; CHECK: BL @use, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: B %bb.3 + ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %4, %bb.3 + ; CHECK: $w0 = COPY [[PHI]] + ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK: bb.3.for.body: + ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) + ; CHECK: [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %5, %bb.3 + ; CHECK: [[PHI2:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %4, %bb.3 + ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI2]], [[LDRWui]] + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SDIVWr]] + ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv + ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]] + ; CHECK: Bcc 0, %bb.2, implicit $nzcv + ; CHECK: B %bb.3 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $w0 + + %6:gpr32common = COPY $w0 + %7:gpr32 = SUBSWri %6, 1, 0, implicit-def $nzcv + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 + + bb.1.for.body.preheader: + successors: %bb.3(0x80000000) + + %8:gpr64common = ADRP target-flags(aarch64-page) @A + %9:gpr32 = LDRWui killed %8, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $w0 = COPY %9 + BL @use, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.3 + + bb.2.for.cond.cleanup: + %1:gpr32all = PHI %6, %bb.0, %4, %bb.3 + $w0 = COPY %1 + RET_ReallyLR implicit $w0 + + bb.3.for.body: + successors: %bb.2(0x04000000), %bb.3(0x7c000000) + + %2:gpr32sp = PHI %6, %bb.1, %5, %bb.3 + %3:gpr32 = PHI %6, %bb.1, %4, %bb.3 + %11:gpr32 = SDIVWr %3, %9 + %4:gpr32all = COPY %11 + %12:gpr32 = SUBSWri %2, 1, 0, implicit-def $nzcv + %5:gpr32all = COPY %12 + Bcc 0, %bb.2, implicit $nzcv + B %bb.3 + +... +--- +name: cant_sink_use_outside_loop +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gpr32all, preferred-register: '' } + - { id: 1, class: gpr32all, preferred-register: '' } + - { id: 2, class: gpr32all, preferred-register: '' } + - { id: 3, class: gpr32sp, preferred-register: '' } + - { id: 4, class: gpr32all, preferred-register: '' } + - { id: 5, class: gpr32all, preferred-register: '' } + - { id: 6, class: gpr32all, preferred-register: '' } + - { id: 7, class: gpr32common, preferred-register: '' } + - { id: 8, class: gpr32all, preferred-register: '' } + - { id: 9, class: gpr32all, preferred-register: '' } + - { id: 10, class: gpr32, preferred-register: '' } + - { id: 11, class: gpr64common, preferred-register: '' } + - { id: 12, class: gpr32, preferred-register: '' } + - { id: 13, class: gpr32, preferred-register: '' } + - { id: 14, class: gpr32, preferred-register: '' } + - { id: 15, class: gpr32all, preferred-register: '' } +liveins: + - { reg: '$w0', virtual-reg: '%7' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: cant_sink_use_outside_loop + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x50000000), %bb.4(0x30000000) + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: Bcc 10, %bb.1, implicit $nzcv + ; CHECK: bb.4: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY $wzr + ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]] + ; CHECK: B %bb.2 + ; CHECK: bb.1.for.body.preheader: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + ; CHECK: [[COPY3:%[0-9]+]]:gpr32all = COPY [[LDRWui]] + ; CHECK: B %bb.3 + ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.4, %5, %bb.5 + ; CHECK: [[PHI1:%[0-9]+]]:gpr32all = PHI [[COPY2]], %bb.4, [[COPY3]], %bb.5 + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: $w0 = COPY [[PHI1]] + ; CHECK: BL @use, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK: $w0 = COPY [[PHI]] + ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK: bb.3.for.body: + ; CHECK: successors: %bb.5(0x04000000), %bb.3(0x7c000000) + ; CHECK: [[PHI2:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %6, %bb.3 + ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI2]], 1, 0, implicit-def $nzcv + ; CHECK: [[COPY4:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]] + ; CHECK: Bcc 1, %bb.3, implicit $nzcv + ; CHECK: bb.5: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[COPY5:%[0-9]+]]:gpr32all = COPY [[MOVi32imm]] + ; CHECK: B %bb.2 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $w0 + + %7:gpr32common = COPY $w0 + %9:gpr32all = COPY $wzr + %8:gpr32all = COPY %9 + %10:gpr32 = SUBSWri %7, 1, 0, implicit-def $nzcv + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 + + bb.1.for.body.preheader: + successors: %bb.3(0x80000000) + + %11:gpr64common = ADRP target-flags(aarch64-page) @A + %12:gpr32 = LDRWui killed %11, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + %0:gpr32all = COPY %12 + B %bb.3 + + bb.2.for.cond.cleanup: + %1:gpr32all = PHI %7, %bb.0, %5, %bb.3 + %2:gpr32all = PHI %8, %bb.0, %0, %bb.3 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $w0 = COPY %2 + BL @use, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + $w0 = COPY %1 + RET_ReallyLR implicit $w0 + + bb.3.for.body: + successors: %bb.2(0x04000000), %bb.3(0x7c000000) + + %3:gpr32sp = PHI %7, %bb.1, %6, %bb.3 + %13:gpr32 = MOVi32imm 1 + %5:gpr32all = COPY %13 + %14:gpr32 = SUBSWri %3, 1, 0, implicit-def $nzcv + %6:gpr32all = COPY %14 + Bcc 0, %bb.2, implicit $nzcv + B %bb.3 + +... +--- +name: use_is_not_a_copy +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gpr32, preferred-register: '' } + - { id: 1, class: gpr32all, preferred-register: '' } + - { id: 2, class: gpr32sp, preferred-register: '' } + - { id: 3, class: gpr32, preferred-register: '' } + - { id: 4, class: gpr32all, preferred-register: '' } + - { id: 5, class: gpr32all, preferred-register: '' } + - { id: 6, class: gpr32common, preferred-register: '' } + - { id: 7, class: gpr32, preferred-register: '' } + - { id: 8, class: gpr64common, preferred-register: '' } + - { id: 9, class: gpr32, preferred-register: '' } + - { id: 10, class: gpr32, preferred-register: '' } + - { id: 11, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$w0', virtual-reg: '%6' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: use_is_not_a_copy + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: Bcc 11, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1.for.body.preheader: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + ; CHECK: B %bb.3 + ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %4, %bb.3 + ; CHECK: $w0 = COPY [[PHI]] + ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK: bb.3.for.body: + ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) + ; CHECK: [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %5, %bb.3 + ; CHECK: [[PHI2:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %4, %bb.3 + ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI2]], [[LDRWui]] + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SDIVWr]] + ; CHECK: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv + ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]] + ; CHECK: Bcc 0, %bb.2, implicit $nzcv + ; CHECK: B %bb.3 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $w0 + + %6:gpr32common = COPY $w0 + %7:gpr32 = SUBSWri %6, 1, 0, implicit-def $nzcv + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 + + bb.1.for.body.preheader: + successors: %bb.3(0x80000000) + + %8:gpr64common = ADRP target-flags(aarch64-page) @A + %9:gpr32 = LDRWui killed %8, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + B %bb.3 + + bb.2.for.cond.cleanup: + %1:gpr32all = PHI %6, %bb.0, %4, %bb.3 + $w0 = COPY %1 + RET_ReallyLR implicit $w0 + + bb.3.for.body: + successors: %bb.2(0x04000000), %bb.3(0x7c000000) + + %2:gpr32sp = PHI %6, %bb.1, %5, %bb.3 + %3:gpr32 = PHI %6, %bb.1, %4, %bb.3 + %10:gpr32 = SDIVWr %3, %9 + %4:gpr32all = COPY %10 + %11:gpr32 = SUBSWri %2, 1, 0, implicit-def $nzcv + %5:gpr32all = COPY %11 + Bcc 0, %bb.2, implicit $nzcv + B %bb.3 + +... Index: llvm/test/CodeGen/X86/sink-cheap-instructions.ll =================================================================== --- llvm/test/CodeGen/X86/sink-cheap-instructions.ll +++ /dev/null @@ -1,62 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-linux -sink-insts-to-avoid-spills | FileCheck %s -check-prefix=SINK - -; Ensure that we sink copy-like instructions into loops to avoid register -; spills. - -; CHECK: Spill -; SINK-NOT: Spill - -%struct.A = type { i32, i32, i32, i32, i32, i32 } - -define void @_Z1fPhP1A(i8* nocapture readonly %input, %struct.A* %a) { - %1 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0 - %2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 1 - %3 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 2 - %4 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 3 - %5 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 4 - %6 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 5 - br label %.backedge - -.backedge: - %.0 = phi i8* [ %input, %0 ], [ %7, %.backedge.backedge ] - %7 = getelementptr inbounds i8, i8* %.0, i64 1 - %8 = load i8, i8* %7, align 1 - switch i8 %8, label %.backedge.backedge [ - i8 0, label %9 - i8 10, label %10 - i8 20, label %11 - i8 30, label %12 - i8 40, label %13 - i8 50, label %14 - ] - -;