Index: llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -54,6 +54,8 @@ bool ProcessLoop(MachineLoop *ML); + bool RevertNonLoops(MachineFunction &MF); + void RevertWhile(MachineInstr *MI) const; void RevertLoopDec(MachineInstr *MI) const; @@ -98,9 +100,15 @@ if (!ML->getParentLoop()) Changed |= ProcessLoop(ML); } + Changed |= RevertNonLoops(MF); return Changed; } +static bool IsLoopStart(MachineInstr &MI) { + return MI.getOpcode() == ARM::t2DoLoopStart || + MI.getOpcode() == ARM::t2WhileLoopStart; +} + bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { bool Changed = false; @@ -111,15 +119,10 @@ LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML); - auto IsLoopStart = [](MachineInstr &MI) { - return MI.getOpcode() == ARM::t2DoLoopStart || - MI.getOpcode() == ARM::t2WhileLoopStart; - }; - // Search the given block for a loop start instruction. If one isn't found, // and there's only one predecessor block, search that one too. std::function SearchForStart = - [&IsLoopStart, &SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* { + [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* { for (auto &MI : *MBB) { if (IsLoopStart(MI)) return &MI; @@ -165,6 +168,8 @@ Dec = &MI; else if (MI.getOpcode() == ARM::t2LoopEnd) End = &MI; + else if (IsLoopStart(MI)) + Start = &MI; else if (MI.getDesc().isCall()) // TODO: Though the call will require LE to execute again, does this // mean we should revert? Always executing LE hopefully should be @@ -190,11 +195,16 @@ break; } + LLVM_DEBUG(if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start; + if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec; + if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;); + if (!Start && !Dec && !End) { LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n"); return Changed; - } if (!(Start && Dec && End)) { - report_fatal_error("Failed to find all loop components"); + } else if (!(Start && Dec && End)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n"); + return false; } if (!End->getOperand(1).isMBB() || @@ -216,10 +226,6 @@ Revert = true; } - LLVM_DEBUG(dbgs() << "ARM Loops:\n - Found Loop Start: " << *Start - << " - Found Loop Dec: " << *Dec - << " - Found Loop End: " << *End); - Expand(ML, Start, Dec, End, Revert); return true; } @@ -379,6 +385,44 @@ } } +bool ARMLowOverheadLoops::RevertNonLoops(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n"); + bool Changed = false; + + for (auto &MBB : MF) { + SmallVector Starts; + SmallVector Decs; + SmallVector Ends; + + for (auto &I : MBB) { + if (IsLoopStart(I)) + Starts.push_back(&I); + else if (I.getOpcode() == ARM::t2LoopDec) + Decs.push_back(&I); + else if (I.getOpcode() == ARM::t2LoopEnd) + Ends.push_back(&I); + } + + if (Starts.empty() && Decs.empty() && Ends.empty()) + continue; + + Changed = true; + + for (auto *Start : Starts) { + if (Start->getOpcode() == ARM::t2WhileLoopStart) + RevertWhile(Start); + else + Start->eraseFromParent(); + } + for (auto *Dec : Decs) + RevertLoopDec(Dec); + + for (auto *End : Ends) + RevertLoopEnd(End); + } + return Changed; +} + FunctionPass *llvm::createARMLowOverheadLoopsPass() { return new ARMLowOverheadLoops(); } Index: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir @@ -0,0 +1,170 @@ +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s --verify-machineinstrs -o - | FileCheck %s + +# CHECK: name: non_loop +# CHECK: bb.0.entry: +# CHECK: tBcc %bb.2, 3 +# CHECK: tB %bb.1, 14 +# CHECK: bb.1.not.preheader: +# CHECK: t2CMPri $lr, 0, 14 +# CHECK: t2Bcc %bb.3, 0 +# CHECK: tB %bb.2 +# CHECK: bb.2.while.body: +# CHECK: t2CMPri $lr, 0, 14 +# CHECK: t2Bcc %bb.2, 1 +# CHECK: tB %bb.3 +# CHECK: bb.3.while.end: + +--- | + define void @non_loop(i16* nocapture %a, i16* nocapture readonly %b, i32 %N) { + entry: + %cmp = icmp ugt i32 %N, 2 + br i1 %cmp, label %not.preheader, label %while.body + + not.preheader: ; preds = %entry + %test = call i1 @llvm.test.set.loop.iterations.i32(i32 %N) + br i1 %test, label %while.body, label %while.end + + while.body: ; preds = %while.body, %not.preheader, %entry + %a.addr.06 = phi i16* [ %incdec.ptr1, %while.body ], [ %a, %entry ], [ %a, %not.preheader ] + %b.addr.05 = phi i16* [ %incdec.ptr, %while.body ], [ %b, %entry ], [ %b, %not.preheader ] + %count = phi i32 [ %count.next, %while.body ], [ %N, %entry ], [ %N, %not.preheader ] + %incdec.ptr = getelementptr inbounds i16, i16* %b.addr.05, i32 1 + %load = load i16, i16* %b.addr.05, align 2 + %incdec.ptr1 = getelementptr inbounds i16, i16* %a.addr.06, i32 1 + store i16 %load, i16* %a.addr.06, align 2 + %count.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1) + %cmp1 = icmp ne i32 %count.next, 0 + br i1 %cmp1, label %while.body, label %while.end + + while.end: ; preds = %while.body, %not.preheader + ret void + } + + declare i1 @llvm.test.set.loop.iterations.i32(i32) #0 + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 + + attributes #0 = { noduplicate nounwind } + attributes #1 = { nounwind } + +... +--- +name: non_loop +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: false +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 32 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -20, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -24, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, name: '', type: spill-slot, offset: -28, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + $sp = frame-setup tSUBspi $sp, 6, 14, $noreg + frame-setup CFI_INSTRUCTION def_cfa_offset 32 + tCMPi8 renamable $r2, 3, 14, $noreg, implicit-def $cpsr + $r3 = tMOVr $r0, 14, $noreg + $r12 = tMOVr $r1, 14, $noreg + $lr = tMOVr $r2, 14, $noreg + tSTRspi killed $r2, $sp, 5, 14, $noreg :: (store 4 into %stack.0) + tSTRspi killed $r1, $sp, 4, 14, $noreg :: (store 4 into %stack.1) + tSTRspi killed $r0, $sp, 3, 14, $noreg :: (store 4 into %stack.2) + tSTRspi killed $r3, $sp, 2, 14, $noreg :: (store 4 into %stack.3) + t2STRi12 killed $r12, $sp, 4, 14, $noreg :: (store 4 into %stack.4) + t2STRi12 killed $lr, $sp, 0, 14, $noreg :: (store 4 into %stack.5) + tBcc %bb.2, 3, $cpsr + tB %bb.1, 14, $noreg + + bb.1.not.preheader: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + $r0 = tLDRspi $sp, 3, 14, $noreg :: (load 4 from %stack.2) + $r1 = tLDRspi $sp, 4, 14, $noreg :: (load 4 from %stack.1) + $r2 = tLDRspi $sp, 5, 14, $noreg :: (load 4 from %stack.0) + $r3 = tLDRspi $sp, 5, 14, $noreg :: (load 4 from %stack.0) + tSTRspi killed $r0, $sp, 2, 14, $noreg :: (store 4 into %stack.3) + tSTRspi killed $r1, $sp, 1, 14, $noreg :: (store 4 into %stack.4) + tSTRspi killed $r2, $sp, 0, 14, $noreg :: (store 4 into %stack.5) + t2WhileLoopStart killed renamable $r3, %bb.3 + tB %bb.2, 14, $noreg + + bb.2.while.body: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + $r0 = tLDRspi $sp, 0, 14, $noreg :: (load 4 from %stack.5) + $r1 = tLDRspi $sp, 1, 14, $noreg :: (load 4 from %stack.4) + $r2 = tLDRspi $sp, 2, 14, $noreg :: (load 4 from %stack.3) + renamable $r3, renamable $r1 = t2LDRH_POST renamable $r1, 2, 14, $noreg :: (load 2 from %ir.b.addr.05) + early-clobber renamable $r2 = t2STRH_POST killed renamable $r3, renamable $r2, 2, 14, $noreg :: (store 2 into %ir.a.addr.06) + $lr = tMOVr killed $r0, 14, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + $r0 = tMOVr $lr, 14, $noreg + tSTRspi killed $r0, $sp, 0, 14, $noreg :: (store 4 into %stack.5) + tSTRspi killed $r1, $sp, 1, 14, $noreg :: (store 4 into %stack.4) + tSTRspi killed $r2, $sp, 2, 14, $noreg :: (store 4 into %stack.3) + t2LoopEnd killed renamable $lr, %bb.2 + tB %bb.3, 14, $noreg + + bb.3.while.end: + $sp = tADDspi $sp, 6, 14, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc + +...