Index: lib/Target/ARM/ARMLowOverheadLoops.cpp =================================================================== --- lib/Target/ARM/ARMLowOverheadLoops.cpp +++ lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -34,6 +34,7 @@ namespace { class ARMLowOverheadLoops : public MachineFunctionPass { + MachineFunction *MF = nullptr; const ARMBaseInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; std::unique_ptr BBUtils = nullptr; @@ -51,9 +52,21 @@ bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return ARM_LOW_OVERHEAD_LOOPS_NAME; + } + + private: bool ProcessLoop(MachineLoop *ML); - bool RevertNonLoops(MachineFunction &MF); + MachineInstr * IsSafeToDefineLR(MachineInstr *MI); + + bool RevertNonLoops(); void RevertWhile(MachineInstr *MI) const; @@ -62,16 +75,9 @@ void RevertLoopEnd(MachineInstr *MI) const; void Expand(MachineLoop *ML, MachineInstr *Start, - MachineInstr *Dec, MachineInstr *End, bool Revert); + MachineInstr *InsertPt, MachineInstr *Dec, + MachineInstr *End, bool Revert); - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); - } - - StringRef getPassName() const override { - return ARM_LOW_OVERHEAD_LOOPS_NAME; - } }; } @@ -80,26 +86,28 @@ INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME, false, false) -bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) { - if (!static_cast(MF.getSubtarget()).hasLOB()) +bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { + const ARMSubtarget &ST = static_cast(mf.getSubtarget()); + if (!ST.hasLOB()) return false; - LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n"); + MF = &mf; + LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n"); auto &MLI = getAnalysis(); - MRI = &MF.getRegInfo(); - TII = static_cast( - MF.getSubtarget().getInstrInfo()); - BBUtils = std::unique_ptr(new ARMBasicBlockUtils(MF)); + MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness); + MRI = &MF->getRegInfo(); + TII = static_cast(ST.getInstrInfo()); + BBUtils = std::unique_ptr(new ARMBasicBlockUtils(*MF)); BBUtils->computeAllBlockSizes(); - BBUtils->adjustBBOffsetsAfter(&MF.front()); + BBUtils->adjustBBOffsetsAfter(&MF->front()); bool Changed = false; for (auto ML : MLI) { if (!ML->getParentLoop()) Changed |= ProcessLoop(ML); } - Changed |= RevertNonLoops(MF); + Changed |= RevertNonLoops(); return Changed; } @@ -108,6 +116,106 @@ MI.getOpcode() == ARM::t2WhileLoopStart; } +template +static MachineInstr* SearchForDef(MachineInstr *Begin, T End, + unsigned Reg) { + for(auto &MI : make_range(T(Begin), End)) { + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg) + continue; + return &MI; + } + } + return nullptr; +} + +static MachineInstr* SearchForUse(MachineInstr *Begin, + MachineBasicBlock::iterator End, + unsigned Reg) { + for(auto &MI : make_range(MachineBasicBlock::iterator(Begin), End)) { + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + return &MI; + } + } + return nullptr; +} + +// Is it safe to define LR with DLS/WLS? +// LR can defined if it is the operand to start, because it's the same value, +// or if it's going to be equivalent to the operand to Start. +MachineInstr *ARMLowOverheadLoops::IsSafeToDefineLR(MachineInstr *Start) { + + auto IsMoveLR = [](MachineInstr *MI, unsigned Reg) { + return MI->getOpcode() == ARM::tMOVr && + MI->getOperand(0).getReg() == ARM::LR && + MI->getOperand(1).getReg() == Reg && + MI->getOperand(2).getImm() == ARMCC::AL; + }; + + MachineBasicBlock *MBB = Start->getParent(); + unsigned CountReg = Start->getOperand(0).getReg(); + // Walk forward and backward in the block to find the closest instructions + // that define LR. Then also filter them out if they're not a mov lr. + MachineInstr *PredLRDef = + SearchForDef(Start, MBB->rend(), + ARM::LR); + if (PredLRDef && !IsMoveLR(PredLRDef, CountReg)) + PredLRDef = nullptr; + + MachineInstr *SuccLRDef = + SearchForDef(Start, MBB->end(), + ARM::LR); + if (SuccLRDef && !IsMoveLR(SuccLRDef, CountReg)) + SuccLRDef = nullptr; + + // We've either found one, two or none mov lr instructions... Now figure out + // if they are performing the equilvant mov that the Start instruction will. + // Do this by scanning forward and backward to see if there's a def of the + // register holding the count value. If we find a suitable def, return it as + // the insert point. Later, if InsertPt != Start, then we can remove the + // redundant instruction. + if (SuccLRDef) { + MachineBasicBlock::iterator End(SuccLRDef); + if (!SearchForDef(Start, End, CountReg)) { + return SuccLRDef; + } else + SuccLRDef = nullptr; + } + if (PredLRDef) { + MachineBasicBlock::reverse_iterator End(PredLRDef); + if (!SearchForDef(Start, End, + CountReg)) { + return PredLRDef; + } else + PredLRDef = nullptr; + } + + // We can define LR because LR already contains the same value. + if (Start->getOperand(0).getReg() == ARM::LR) + return Start; + + // We've found no suitable LR def and Start doesn't use LR directly. Can we + // just define LR anyway? + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*MBB); + + // Not if we've haven't found a suitable mov and LR is live out. + if (LiveRegs.contains(ARM::LR)) + return nullptr; + + // If LR is not live out, we can insert the instruction if nothing else + // uses LR after it. + if (!SearchForUse(Start, MBB->end(), ARM::LR)) + return Start; + + LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find suitable insertion point for" + << " LR\n"); + return nullptr; +} + bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { bool Changed = false; @@ -169,11 +277,13 @@ End = &MI; else if (IsLoopStart(MI)) Start = &MI; - else if (MI.getDesc().isCall()) + else if (MI.getDesc().isCall()) { // TODO: Though the call will require LE to execute again, does this // mean we should revert? Always executing LE hopefully should be // faster than performing a sub,cmp,br or even subs,br. Revert = true; + LLVM_DEBUG(dbgs() << "ARM Loops: Found call.\n"); + } if (!Dec || End) continue; @@ -237,7 +347,14 @@ Revert = true; } - Expand(ML, Start, Dec, End, Revert); + MachineInstr *InsertPt = Revert ? nullptr : IsSafeToDefineLR(Start); + if (!InsertPt) { + LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n"); + Revert = true; + } else + LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt); + + Expand(ML, Start, InsertPt, Dec, End, Revert); return true; } @@ -304,33 +421,13 @@ } void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start, + MachineInstr *InsertPt, MachineInstr *Dec, MachineInstr *End, bool Revert) { - auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) { - // The trip count should already been held in LR since the instructions - // within the loop can only read and write to LR. So, there should be a - // mov to setup the count. WLS/DLS perform this move, so find the original - // and delete it - inserting WLS/DLS in its place. - MachineBasicBlock *MBB = Start->getParent(); - MachineInstr *InsertPt = Start; - for (auto &I : MRI->def_instructions(ARM::LR)) { - if (I.getParent() != MBB) - continue; - - // Always execute. - if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL) - continue; - - // Only handle move reg, if the trip count it will need moving into a reg - // before the setup instruction anyway. - if (!I.getDesc().isMoveReg() || - !I.getOperand(1).isIdenticalTo(Start->getOperand(0))) - continue; - InsertPt = &I; - break; - } - + auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start, + MachineInstr *InsertPt) { + MachineBasicBlock *MBB = InsertPt->getParent(); unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ? ARM::t2DLS : ARM::t2WLS; MachineInstrBuilder MIB = @@ -389,18 +486,18 @@ RevertLoopDec(Dec); RevertLoopEnd(End); } else { - Start = ExpandLoopStart(ML, Start); + Start = ExpandLoopStart(ML, Start, InsertPt); RemoveDeadBranch(Start); End = ExpandLoopEnd(ML, Dec, End); RemoveDeadBranch(End); } } -bool ARMLowOverheadLoops::RevertNonLoops(MachineFunction &MF) { +bool ARMLowOverheadLoops::RevertNonLoops() { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n"); bool Changed = false; - for (auto &MBB : MF) { + for (auto &MBB : *MF) { SmallVector Starts; SmallVector Decs; SmallVector Ends; Index: test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir =================================================================== --- test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir +++ test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir @@ -4,6 +4,9 @@ # CHECK: $lr = t2LEUpdate renamable $lr, %bb.1 --- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main" + define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { entry: %scevgep = getelementptr i32, i32* %q, i32 -1 @@ -15,10 +18,10 @@ %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ] %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ] %0 = phi i32 [ %n, %entry ], [ %2, %while.body ] - %scevgep7 = getelementptr i32, i32* %lsr.iv, i32 1 - %scevgep4 = getelementptr i32, i32* %lsr.iv4, i32 1 - %1 = load i32, i32* %scevgep7, align 4 - store i32 %1, i32* %scevgep4, align 4 + %scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1 + %1 = load i32, i32* %scevgep6, align 4 + store i32 %1, i32* %scevgep2, align 4 %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1 %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1 %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) @@ -44,7 +47,7 @@ regBankSelected: false selected: false failedISel: false -tracksRegLiveness: false +tracksRegLiveness: true hasWinCFI: false registers: [] liveins: @@ -84,6 +87,7 @@ body: | bb.0.entry: successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 @@ -96,9 +100,10 @@ bb.1.while.body: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $r0, $r1 - renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep7) - early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep4) + renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6) + early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr tB %bb.2, 14, $noreg @@ -108,4 +113,3 @@ tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 ... - Index: test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir =================================================================== --- test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir +++ test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir @@ -9,7 +9,10 @@ # CHECK: bb.2.for.cond.cleanup: # CHECK: bb.3.for.header: ---- | +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main" + define void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { entry: call void @llvm.set.loop.iterations.i32(i32 %N) @@ -45,9 +48,11 @@ } ; Function Attrs: nounwind - declare i32 @llvm.arm.space(i32 immarg, i32) #0 + declare i32 @llvm.arm.space(i32 immarg, i32) #0 + ; Function Attrs: noduplicate nounwind - declare void @llvm.set.loop.iterations.i32(i32) #1 + declare void @llvm.set.loop.iterations.i32(i32) #1 + ; Function Attrs: noduplicate nounwind declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 @@ -63,7 +68,7 @@ regBankSelected: false selected: false failedISel: false -tracksRegLiveness: false +tracksRegLiveness: true hasWinCFI: false registers: [] liveins: @@ -128,6 +133,7 @@ body: | bb.0.entry: successors: %bb.3(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r7, $lr frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 @@ -184,5 +190,3 @@ tB %bb.1, 14, $noreg ... - - Index: test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dls.mir =================================================================== --- /dev/null +++ test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dls.mir @@ -0,0 +1,115 @@ +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s +# CHECK: $lr = t2DLS $r0 +# CHECK-NOT: $lr = tMOVr $r0 +# CHECK: $lr = t2LEUpdate renamable $lr, %bb.1 + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main" + + define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { + entry: + %scevgep = getelementptr i32, i32* %q, i32 -1 + %scevgep3 = getelementptr i32, i32* %p, i32 -1 + call void @llvm.set.loop.iterations.i32(i32 %n) + br label %while.body + + while.body: ; preds = %while.body, %entry + %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ] + %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ] + %0 = phi i32 [ %n, %entry ], [ %2, %while.body ] + %scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1 + %1 = load i32, i32* %scevgep6, align 4 + store i32 %1, i32* %scevgep2, align 4 + %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1 + %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) + %3 = icmp ne i32 %2, 0 + br i1 %3, label %while.body, label %while.end + + while.end: ; preds = %while.body + ret i32 0 + } + + declare void @llvm.set.loop.iterations.i32(i32) #0 + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 + + attributes #0 = { noduplicate nounwind } + attributes #1 = { nounwind } + +... +--- +name: do_copy +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + t2DoLoopStart $r0 + $lr = tMOVr killed $r0, 14, $noreg + renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg + renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg + + bb.1.while.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $r0, $r1 + + renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6) + early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2) + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14, $noreg + + bb.2.while.end: + $r0, dead $cpsr = tMOVi8 0, 14, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + +... Index: test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.mir =================================================================== --- test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.mir +++ test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.mir @@ -5,8 +5,6 @@ # CHECK-NOT: t2LEUpdate --- | - ; ModuleID = '/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.ll' - source_filename = "/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.ll" target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main" @@ -35,15 +33,9 @@ declare i32 @bar(...) local_unnamed_addr #0 - ; Function Attrs: noduplicate nounwind declare void @llvm.set.loop.iterations.i32(i32) #1 - - ; Function Attrs: noduplicate nounwind declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 - ; Function Attrs: nounwind - declare void @llvm.stackprotector(i8*, i8**) #2 - attributes #0 = { "target-features"="+mve.fp" } attributes #1 = { noduplicate nounwind } attributes #2 = { nounwind } @@ -57,7 +49,7 @@ regBankSelected: false selected: false failedISel: false -tracksRegLiveness: false +tracksRegLiveness: true hasWinCFI: false registers: [] liveins: @@ -101,6 +93,7 @@ body: | bb.0.entry: successors: %bb.4(0x30000000), %bb.1(0x50000000) + liveins: $r0, $r4, $r5, $r7, $lr frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 16 @@ -112,6 +105,7 @@ bb.1.while.body.preheader: successors: %bb.2(0x80000000) + liveins: $r0 $lr = tMOVr $r0, 14, $noreg renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg @@ -119,6 +113,7 @@ bb.2.while.body: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + liveins: $lr, $r4 $r5 = tMOVr killed $lr, 14, $noreg tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0 @@ -129,6 +124,8 @@ tB %bb.3, 14, $noreg bb.3.while.end: + liveins: $r4 + $r0 = tMOVr killed $r4, 14, $noreg tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 @@ -138,4 +135,3 @@ tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 ... - Index: test/CodeGen/Thumb2/LowOverheadLoops/revert-after-read.mir =================================================================== --- test/CodeGen/Thumb2/LowOverheadLoops/revert-after-read.mir +++ test/CodeGen/Thumb2/LowOverheadLoops/revert-after-read.mir @@ -4,7 +4,10 @@ # CHECK-NOT: t2DLS # CHECK-NOT: t2LEUpdate ---- | +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main" + define i32 @mov_between_dec_end(i32 %n) #0 { entry: %cmp6 = icmp eq i32 %n, 0 @@ -15,7 +18,6 @@ br label %while.body while.body: ; preds = %while.body, %while.body.preheader - %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ] %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ] %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) %add = add i32 %1, 0 @@ -27,10 +29,7 @@ ret i32 %res.0.lcssa } - ; Function Attrs: noduplicate nounwind declare void @llvm.set.loop.iterations.i32(i32) #1 - - ; Function Attrs: noduplicate nounwind declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 attributes #0 = { "target-features"="+mve.fp" } @@ -46,7 +45,7 @@ regBankSelected: false selected: false failedISel: false -tracksRegLiveness: false +tracksRegLiveness: true hasWinCFI: false registers: [] liveins: @@ -56,11 +55,11 @@ isReturnAddressTaken: false hasStackMap: false hasPatchPoint: false - stackSize: 16 + stackSize: 8 offsetAdjustment: 0 maxAlignment: 4 - adjustsStack: true - hasCalls: true + adjustsStack: false + hasCalls: false stackProtector: '' maxCallFrameSize: 0 cvBytesOfCalleeSavedRegisters: 0 @@ -78,51 +77,46 @@ - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } callSites: [] constants: [] machineFunctionInfo: {} body: | bb.0.entry: successors: %bb.4(0x30000000), %bb.1(0x50000000) + liveins: $r0, $r7, $lr - frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp - frame-setup CFI_INSTRUCTION def_cfa_offset 16 + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 frame-setup CFI_INSTRUCTION offset $r7, -8 - frame-setup CFI_INSTRUCTION offset $r5, -12 - frame-setup CFI_INSTRUCTION offset $r4, -16 tCBZ $r0, %bb.4 bb.1.while.body.preheader: successors: %bb.2(0x80000000) + liveins: $r0 $lr = tMOVr $r0, 14, $noreg - renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg t2DoLoopStart killed $r0 bb.2.while.body: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + liveins: $lr, $r4 - renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg + renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r4, 14, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $r4 = tMOVr $lr, 14, $noreg t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg bb.3.while.end: - $r0 = tMOVr killed $r4, 14, $noreg - tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 + liveins: $lr + + $r0 = tMOVr killed $lr, 14, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 bb.4: - renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg - $r0 = tMOVr killed $r4, 14, $noreg - tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 + renamable $lr = t2MOVi 0, 14, $noreg, $noreg + $r0 = tMOVr killed $lr, 14, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 ... - Index: test/CodeGen/Thumb2/LowOverheadLoops/revert-after-spill.mir =================================================================== --- test/CodeGen/Thumb2/LowOverheadLoops/revert-after-spill.mir +++ /dev/null @@ -1,136 +0,0 @@ -# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s - -# CHECK: while.body: -# CHECK-NOT: t2DLS -# CHECK-NOT: t2LEUpdate - ---- | - define i32 @skip_spill(i32 %n) #0 { - entry: - %cmp6 = icmp eq i32 %n, 0 - br i1 %cmp6, label %while.end, label %while.body.preheader - - while.body.preheader: ; preds = %entry - call void @llvm.set.loop.iterations.i32(i32 %n) - br label %while.body - - while.body: ; preds = %while.body, %while.body.preheader - %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ] - %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ] - %call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() - %add = add nsw i32 %call, %res.07 - %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) - %2 = icmp ne i32 %1, 0 - br i1 %2, label %while.body, label %while.end - - while.end: ; preds = %while.body, %entry - %res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ] - ret i32 %res.0.lcssa - } - - declare i32 @bar(...) local_unnamed_addr #0 - - ; Function Attrs: noduplicate nounwind - declare void @llvm.set.loop.iterations.i32(i32) #1 - - ; Function Attrs: noduplicate nounwind - declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 - - ; Function Attrs: nounwind - declare void @llvm.stackprotector(i8*, i8**) #2 - - attributes #0 = { "target-features"="+mve.fp" } - attributes #1 = { noduplicate nounwind } - attributes #2 = { nounwind } - -... ---- -name: skip_spill -alignment: 2 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: false -hasWinCFI: false -registers: [] -liveins: - - { reg: '$r0', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 16 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: true - hasCalls: true - stackProtector: '' - maxCallFrameSize: 0 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: - - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -callSites: [] -constants: [] -machineFunctionInfo: {} -body: | - bb.0.entry: - successors: %bb.4(0x30000000), %bb.1(0x50000000) - - frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp - frame-setup CFI_INSTRUCTION def_cfa_offset 16 - frame-setup CFI_INSTRUCTION offset $lr, -4 - frame-setup CFI_INSTRUCTION offset $r7, -8 - frame-setup CFI_INSTRUCTION offset $r5, -12 - frame-setup CFI_INSTRUCTION offset $r4, -16 - tCBZ $r0, %bb.4 - - bb.1.while.body.preheader: - successors: %bb.2(0x80000000) - - $lr = tMOVr $r0, 14, $noreg - renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg - t2DoLoopStart killed $r0 - - bb.2.while.body: - successors: %bb.2(0x7c000000), %bb.3(0x04000000) - - $r5 = tMOVr killed $lr, 14, $noreg - tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0 - $lr = tMOVr killed $r5, 14, $noreg - renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg - renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr - tB %bb.3, 14, $noreg - - bb.3.while.end: - $r0 = tMOVr killed $r4, 14, $noreg - tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 - - bb.4: - renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg - $r0 = tMOVr killed $r4, 14, $noreg - tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 - -... - Index: test/CodeGen/Thumb2/LowOverheadLoops/revert-after-write.mir =================================================================== --- test/CodeGen/Thumb2/LowOverheadLoops/revert-after-write.mir +++ test/CodeGen/Thumb2/LowOverheadLoops/revert-after-write.mir @@ -4,7 +4,10 @@ # CHECK-NOT: t2DLS # CHECK-NOT: t2LEUpdate ---- | +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main" + define i32 @mov_between_dec_end(i32 %n) #0 { entry: %cmp6 = icmp eq i32 %n, 0 @@ -15,7 +18,6 @@ br label %while.body while.body: ; preds = %while.body, %while.body.preheader - %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ] %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ] %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) %add = add i32 %1, 2 @@ -33,6 +35,9 @@ ; Function Attrs: noduplicate nounwind declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #2 + attributes #0 = { "target-features"="+mve.fp" } attributes #1 = { noduplicate nounwind } attributes #2 = { nounwind } @@ -46,7 +51,7 @@ regBankSelected: false selected: false failedISel: false -tracksRegLiveness: false +tracksRegLiveness: true hasWinCFI: false registers: [] liveins: @@ -56,11 +61,11 @@ isReturnAddressTaken: false hasStackMap: false hasPatchPoint: false - stackSize: 16 + stackSize: 8 offsetAdjustment: 0 maxAlignment: 4 - adjustsStack: true - hasCalls: true + adjustsStack: false + hasCalls: false stackProtector: '' maxCallFrameSize: 0 cvBytesOfCalleeSavedRegisters: 0 @@ -78,51 +83,45 @@ - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } callSites: [] constants: [] machineFunctionInfo: {} body: | bb.0.entry: successors: %bb.4(0x30000000), %bb.1(0x50000000) + liveins: $r0, $r7, $lr - frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp - frame-setup CFI_INSTRUCTION def_cfa_offset 16 + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 frame-setup CFI_INSTRUCTION offset $r7, -8 - frame-setup CFI_INSTRUCTION offset $r5, -12 - frame-setup CFI_INSTRUCTION offset $r4, -16 tCBZ $r0, %bb.4 bb.1.while.body.preheader: successors: %bb.2(0x80000000) + liveins: $r0 $lr = tMOVr $r0, 14, $noreg - renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg t2DoLoopStart killed $r0 bb.2.while.body: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + liveins: $lr $r4 = tMOVr $lr, 14, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 + renamable $r0 = t2ADDri renamable $lr, 2, 14, $noreg, $noreg $lr = tMOVr $r4, 14, $noreg t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg bb.3.while.end: - $r0 = tMOVr killed $r4, 14, $noreg - tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 + liveins: $r0 + + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 bb.4: - renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg - $r0 = tMOVr killed $r4, 14, $noreg - tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 + renamable $r0, dead $cpsr = tMOVi8 0, 14, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 ... - Index: test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir =================================================================== --- test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir +++ test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir @@ -14,6 +14,9 @@ # CHECK: bb.4.while.end: --- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main" + define void @non_loop(i16* nocapture %a, i16* nocapture readonly %b, i32 %N) { entry: %cmp = icmp ugt i32 %N, 2 @@ -23,19 +26,19 @@ %test = call i1 @llvm.test.set.loop.iterations.i32(i32 %N) br i1 %test, label %while.body.preheader, label %while.end - while.body.preheader: ; preds = %entry, %not.preheader + while.body.preheader: ; preds = %not.preheader, %entry %scevgep = getelementptr i16, i16* %a, i32 -1 %scevgep3 = getelementptr i16, i16* %b, i32 -1 br label %while.body - while.body: ; preds = %while.body.preheader, %while.body + while.body: ; preds = %while.body, %while.body.preheader %lsr.iv4 = phi i16* [ %scevgep3, %while.body.preheader ], [ %scevgep5, %while.body ] %lsr.iv = phi i16* [ %scevgep, %while.body.preheader ], [ %scevgep1, %while.body ] %count = phi i32 [ %count.next, %while.body ], [ %N, %while.body.preheader ] - %scevgep2 = getelementptr i16, i16* %lsr.iv, i32 1 - %scevgep6 = getelementptr i16, i16* %lsr.iv4, i32 1 - %load = load i16, i16* %scevgep6, align 2 - store i16 %load, i16* %scevgep2, align 2 + %scevgep7 = getelementptr i16, i16* %lsr.iv, i32 1 + %scevgep4 = getelementptr i16, i16* %lsr.iv4, i32 1 + %load = load i16, i16* %scevgep4, align 2 + store i16 %load, i16* %scevgep7, align 2 %count.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1) %cmp1 = icmp ne i32 %count.next, 0 %scevgep1 = getelementptr i16, i16* %lsr.iv, i32 1 @@ -46,13 +49,8 @@ ret void } - ; Function Attrs: noduplicate nounwind declare i1 @llvm.test.set.loop.iterations.i32(i32) #0 - - ; Function Attrs: noduplicate nounwind declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 - - ; Function Attrs: nounwind declare void @llvm.stackprotector(i8*, i8**) #1 attributes #0 = { noduplicate nounwind } @@ -67,7 +65,7 @@ regBankSelected: false selected: false failedISel: false -tracksRegLiveness: false +tracksRegLiveness: true hasWinCFI: false registers: [] liveins: @@ -107,6 +105,7 @@ body: | bb.0.entry: successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r0, $r1, $r2, $r7, $lr frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 @@ -118,21 +117,24 @@ bb.1.not.preheader: successors: %bb.2(0x40000000), %bb.4(0x40000000) + liveins: $lr, $r0, $r1 t2WhileLoopStart renamable $lr, %bb.4, implicit-def dead $cpsr tB %bb.2, 14, $noreg bb.2.while.body.preheader: successors: %bb.3(0x80000000) + liveins: $lr, $r0, $r1 renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 2, 14, $noreg renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 2, 14, $noreg bb.3.while.body: successors: %bb.3(0x7c000000), %bb.4(0x04000000) + liveins: $lr, $r0, $r1 - renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14, $noreg :: (load 2 from %ir.scevgep6) - early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep2) + renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14, $noreg :: (load 2 from %ir.scevgep4) + early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep7) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.3, implicit-def dead $cpsr tB %bb.4, 14, $noreg Index: test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir =================================================================== --- test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir +++ test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir @@ -13,6 +13,9 @@ # CHECK-NEXT: tB %bb.3, 14 --- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main" + define void @ne_trip_count(i1 zeroext %t1, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) #0 { entry: %0 = call i1 @llvm.test.set.loop.iterations.i32(i32 %N) @@ -23,15 +26,15 @@ %scevgep5 = getelementptr i32, i32* %b, i32 -1 br label %do.body - do.body: ; preds = %do.body.preheader, %do.body + do.body: ; preds = %do.body, %do.body.preheader %lsr.iv6 = phi i32* [ %scevgep5, %do.body.preheader ], [ %scevgep7, %do.body ] %lsr.iv = phi i32* [ %scevgep2, %do.body.preheader ], [ %scevgep3, %do.body ] %1 = phi i32 [ %2, %do.body ], [ %N, %do.body.preheader ] - %scevgep8 = getelementptr i32, i32* %lsr.iv6, i32 1 - %scevgep4 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep = getelementptr i32, i32* %lsr.iv6, i32 1 + %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1 %size = call i32 @llvm.arm.space(i32 4096, i32 undef) - %tmp = load i32, i32* %scevgep8, align 4 - store i32 %tmp, i32* %scevgep4, align 4 + %tmp = load i32, i32* %scevgep, align 4 + store i32 %tmp, i32* %scevgep1, align 4 %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %1, i32 1) %3 = icmp ne i32 %2, 0 %scevgep3 = getelementptr i32, i32* %lsr.iv, i32 1 @@ -51,9 +54,6 @@ ; Function Attrs: noduplicate nounwind declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2 - ; Function Attrs: nounwind - declare void @llvm.stackprotector(i8*, i8**) #1 - attributes #0 = { "target-features"="+lob" } attributes #1 = { nounwind } attributes #2 = { noduplicate nounwind } @@ -67,7 +67,7 @@ regBankSelected: false selected: false failedISel: false -tracksRegLiveness: false +tracksRegLiveness: true hasWinCFI: false registers: [] liveins: @@ -107,6 +107,7 @@ body: | bb.0.entry: successors: %bb.1(0x40000000), %bb.3(0x40000000) + liveins: $r1, $r2, $r3, $r7, $lr frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 @@ -117,6 +118,7 @@ bb.1.do.body.preheader: successors: %bb.2(0x80000000) + liveins: $r1, $r2, $r3 renamable $r0, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg @@ -124,10 +126,11 @@ bb.2.do.body: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + liveins: $lr, $r0, $r1 dead renamable $r2 = SPACE 4096, undef renamable $r0 - renamable $r2, renamable $r0 = t2LDR_PRE killed renamable $r0, 4, 14, $noreg :: (load 4 from %ir.scevgep8) - early-clobber renamable $r1 = t2STR_PRE killed renamable $r2, killed renamable $r1, 4, 14, $noreg :: (store 4 into %ir.scevgep4) + renamable $r2, renamable $r0 = t2LDR_PRE killed renamable $r0, 4, 14, $noreg :: (load 4 from %ir.scevgep) + early-clobber renamable $r1 = t2STR_PRE killed renamable $r2, killed renamable $r1, 4, 14, $noreg :: (store 4 into %ir.scevgep1) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg @@ -136,4 +139,3 @@ tPOP_RET 14, $noreg, def $r7, def $pc ... - Index: test/CodeGen/Thumb2/LowOverheadLoops/safe-def-no-mov.mir =================================================================== --- /dev/null +++ test/CodeGen/Thumb2/LowOverheadLoops/safe-def-no-mov.mir @@ -0,0 +1,124 @@ +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s +# CHECK: $lr = t2DLS $r0 +# CHECK: $lr = tMOVr $r0, 14 +# CHECK: $lr = t2LEUpdate renamable $lr, %bb.2 + +# TODO: Explore the preheader to remove the redundant tMOVr + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main" + + define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { + entry: + %scevgep = getelementptr i32, i32* %q, i32 -1 + %scevgep3 = getelementptr i32, i32* %p, i32 -1 + call void @llvm.set.loop.iterations.i32(i32 %n) + br label %preheader + + preheader: + br label %while.body + + while.body: ; preds = %while.body, %entry + %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %preheader ] + %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %preheader ] + %0 = phi i32 [ %n, %preheader ], [ %2, %while.body ] + %scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1 + %1 = load i32, i32* %scevgep6, align 4 + store i32 %1, i32* %scevgep2, align 4 + %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1 + %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) + %3 = icmp ne i32 %2, 0 + br i1 %3, label %while.body, label %while.end + + while.end: ; preds = %while.body + ret i32 0 + } + + declare void @llvm.set.loop.iterations.i32(i32) #0 + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 + + attributes #0 = { noduplicate nounwind } + attributes #1 = { nounwind } + +... +--- +name: do_copy +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + t2DoLoopStart $r0 + renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg + renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg + + bb.1.preheader: + successors: %bb.2(0x80000000) + liveins: $r0 + $lr = tMOVr $r0, 14, $noreg + + bb.2.while.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) + liveins: $lr, $r0, $r1 + + renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6) + early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2) + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg + + bb.3.while.end: + $r0, dead $cpsr = tMOVi8 0, 14, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + +... Index: test/CodeGen/Thumb2/LowOverheadLoops/unsafe-liveout.mir =================================================================== --- /dev/null +++ test/CodeGen/Thumb2/LowOverheadLoops/unsafe-liveout.mir @@ -0,0 +1,122 @@ +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s +# CHECK-NOT: $lr = t2DLS +# CHECK: $lr = tMOVr $r0, 14 +# CHECK-NOT: $lr = t2LEUpdate + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main" + + define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { + entry: + %scevgep = getelementptr i32, i32* %q, i32 -1 + %scevgep3 = getelementptr i32, i32* %p, i32 -1 + call void @llvm.set.loop.iterations.i32(i32 %n) + br label %preheader + + preheader: + br label %while.body + + while.body: ; preds = %while.body, %entry + %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %preheader ] + %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %preheader ] + %0 = phi i32 [ %n, %preheader ], [ %2, %while.body ] + %scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1 + %1 = load i32, i32* %scevgep6, align 4 + store i32 %1, i32* %scevgep2, align 4 + %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1 + %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) + %3 = icmp ne i32 %2, 0 + br i1 %3, label %while.body, label %while.end + + while.end: ; preds = %while.body + ret i32 0 + } + + declare void @llvm.set.loop.iterations.i32(i32) #0 + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 + + attributes #0 = { noduplicate nounwind } + attributes #1 = { nounwind } + +... +--- +name: do_copy +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + + frame-setup tPUSH 14, $noreg, killed $r7, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + t2DoLoopStart $r0 + renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg + renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg + + bb.1.preheader: + successors: %bb.2(0x80000000) + liveins: $r0, $lr + $lr = tMOVr $r0, 14, $noreg + + bb.2.while.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) + liveins: $lr, $r0, $r1 + + renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6) + early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2) + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg + + bb.3.while.end: + $r0, dead $cpsr = tMOVi8 0, 14, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + +... Index: test/CodeGen/Thumb2/LowOverheadLoops/unsafe-use-after.mir =================================================================== --- /dev/null +++ test/CodeGen/Thumb2/LowOverheadLoops/unsafe-use-after.mir @@ -0,0 +1,122 @@ +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s +# CHECK-NOT: $lr = t2DLS +# CHECK: $lr = tMOVr $r0, 14 +# CHECK-NOT: $lr = t2LEUpdate + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main" + + define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { + entry: + %scevgep = getelementptr i32, i32* %q, i32 -1 + %scevgep3 = getelementptr i32, i32* %p, i32 -1 + call void @llvm.set.loop.iterations.i32(i32 %n) + br label %preheader + + preheader: + br label %while.body + + while.body: ; preds = %while.body, %entry + %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %preheader ] + %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %preheader ] + %0 = phi i32 [ %n, %preheader ], [ %2, %while.body ] + %scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1 + %1 = load i32, i32* %scevgep6, align 4 + store i32 %1, i32* %scevgep2, align 4 + %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1 + %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) + %3 = icmp ne i32 %2, 0 + br i1 %3, label %while.body, label %while.end + + while.end: ; preds = %while.body + ret i32 0 + } + + declare void @llvm.set.loop.iterations.i32(i32) #0 + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 + + attributes #0 = { noduplicate nounwind } + attributes #1 = { nounwind } + +... +--- +name: do_copy +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + + frame-setup tPUSH 14, $noreg, killed $r7, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + t2DoLoopStart $r0 + renamable $r0 = t2SUBri killed renamable $lr, 4, 14, $noreg, def $cpsr + renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg + + bb.1.preheader: + successors: %bb.2(0x80000000) + liveins: $r0 + $lr = tMOVr $r0, 14, $noreg + + bb.2.while.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) + liveins: $lr, $r0, $r1 + + renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6) + early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2) + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg + + bb.3.while.end: + $r0, dead $cpsr = tMOVi8 0, 14, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + +... Index: test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir =================================================================== --- test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir +++ test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir @@ -3,8 +3,6 @@ # CHECK-NOT: WhileLoopStart --- | - ; ModuleID = '/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.ll' - source_filename = "while-size-limit.ll" target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main" @@ -47,8 +45,10 @@ ; Function Attrs: nounwind declare i32 @llvm.arm.space(i32 immarg, i32) #1 + ; Function Attrs: noduplicate nounwind declare i1 @llvm.test.set.loop.iterations.i32(i32) #2 + ; Function Attrs: noduplicate nounwind declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2 @@ -65,7 +65,7 @@ regBankSelected: false selected: false failedISel: false -tracksRegLiveness: false +tracksRegLiveness: true hasWinCFI: false registers: [] liveins: @@ -130,6 +130,7 @@ body: | bb.0.entry: successors: %bb.4(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r4, $lr frame-setup tPUSH 14, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 @@ -192,5 +193,3 @@ tB %bb.2, 14, $noreg ... - - Index: test/CodeGen/Thumb2/LowOverheadLoops/while.mir =================================================================== --- test/CodeGen/Thumb2/LowOverheadLoops/while.mir +++ test/CodeGen/Thumb2/LowOverheadLoops/while.mir @@ -10,8 +10,6 @@ # CHECK: $lr = t2LEUpdate renamable $lr --- | - ; ModuleID = '/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while.ll' - source_filename = "/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while.ll" target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main" @@ -25,14 +23,14 @@ %scevgep3 = getelementptr i16, i16* %b, i32 -1 br label %while.body - while.body: ; preds = %while.body.preheader, %while.body + while.body: ; preds = %while.body, %while.body.preheader %lsr.iv4 = phi i16* [ %scevgep3, %while.body.preheader ], [ %scevgep5, %while.body ] %lsr.iv = phi i16* [ %scevgep, %while.body.preheader ], [ %scevgep1, %while.body ] %1 = phi i32 [ %3, %while.body ], [ %N, %while.body.preheader ] - %scevgep2 = getelementptr i16, i16* %lsr.iv, i32 1 - %scevgep6 = getelementptr i16, i16* %lsr.iv4, i32 1 - %2 = load i16, i16* %scevgep6, align 2, !tbaa !2 - store i16 %2, i16* %scevgep2, align 2, !tbaa !2 + %scevgep7 = getelementptr i16, i16* %lsr.iv, i32 1 + %scevgep4 = getelementptr i16, i16* %lsr.iv4, i32 1 + %2 = load i16, i16* %scevgep4, align 2 + store i16 %2, i16* %scevgep7, align 2 %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %1, i32 1) %4 = icmp ne i32 %3, 0 %scevgep1 = getelementptr i16, i16* %lsr.iv, i32 1 @@ -48,15 +46,6 @@ attributes #0 = { noduplicate nounwind } attributes #1 = { nounwind } - - !llvm.module.flags = !{!0, !1} - - !0 = !{i32 1, !"wchar_size", i32 4} - !1 = !{i32 1, !"min_enum_size", i32 4} - !2 = !{!3, !3, i64 0} - !3 = !{!"short", !4, i64 0} - !4 = !{!"omnipotent char", !5, i64 0} - !5 = !{!"Simple C/C++ TBAA"} ... --- @@ -67,7 +56,7 @@ regBankSelected: false selected: false failedISel: false -tracksRegLiveness: false +tracksRegLiveness: true hasWinCFI: false registers: [] liveins: @@ -107,6 +96,7 @@ body: | bb.0.entry: successors: %bb.1(0x40000000), %bb.3(0x40000000) + liveins: $r0, $r1, $r2, $r7, $lr frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 @@ -117,6 +107,7 @@ bb.1.while.body.preheader: successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2 renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 2, 14, $noreg renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 2, 14, $noreg @@ -124,9 +115,10 @@ bb.2.while.body: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + liveins: $lr, $r0, $r1 - renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14, $noreg :: (load 2 from %ir.scevgep6, !tbaa !2) - early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep2, !tbaa !2) + renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14, $noreg :: (load 2 from %ir.scevgep4) + early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep7) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg @@ -135,4 +127,3 @@ tPOP_RET 14, $noreg, def $r7, def $pc ... -