Index: llvm/trunk/lib/CodeGen/HardwareLoops.cpp =================================================================== --- llvm/trunk/lib/CodeGen/HardwareLoops.cpp +++ llvm/trunk/lib/CodeGen/HardwareLoops.cpp @@ -294,6 +294,7 @@ // Check that the icmp is checking for equality of Count and zero and that // a non-zero value results in entering the loop. auto ICmp = cast(BI->getCondition()); + LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n"); if (!ICmp->isEquality()) return false; Index: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2998,6 +2998,16 @@ // Other cases are autogenerated. break; } + case ARMISD::WLS: { + SDValue Ops[] = { N->getOperand(1), // Loop count + N->getOperand(2), // Exit target + N->getOperand(0) }; + SDNode *LoopStart = + CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, Ops); + ReplaceUses(N, LoopStart); + CurDAG->RemoveDeadNode(N); + return; + } case ARMISD::BRCOND: { // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h @@ -125,6 +125,8 @@ WIN__CHKSTK, // Windows' __chkstk call to do stack probing. WIN__DBZCHK, // Windows' divide by zero check + WLS, // Low-overhead loops, While Loop Start + VCEQ, // Vector compare equal. VCEQZ, // Vector compare equal to zero. VCGE, // Vector compare greater than or equal. Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -633,6 +633,10 @@ if (Subtarget->hasMVEIntegerOps()) addMVEVectorTypes(Subtarget->hasMVEFloatOps()); + // Combine low-overhead loop intrinsics so that we can lower i1 types. + if (Subtarget->hasLOB()) + setTargetDAGCombine(ISD::BRCOND); + if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); addDRTypeForNEON(MVT::v8i8); @@ -1542,6 +1546,7 @@ case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; + case ARMISD::WLS: return "ARMISD::WLS"; } return nullptr; } @@ -12883,6 +12888,42 @@ return V; } +static SDValue PerformHWLoopCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *ST) { + // Look for (brcond (xor test.set.loop.iterations, -1) + SDValue CC = N->getOperand(1); + + if (CC->getOpcode() != ISD::XOR && CC->getOpcode() != ISD::SETCC) + return SDValue(); + + if (CC->getOperand(0)->getOpcode() != ISD::INTRINSIC_W_CHAIN) + return SDValue(); + + SDValue Int = CC->getOperand(0); + unsigned IntOp = cast(Int.getOperand(1))->getZExtValue(); + if (IntOp != Intrinsic::test_set_loop_iterations) + return SDValue(); + + if (auto *Const = dyn_cast(CC->getOperand(1))) + assert(Const->isOne() && "Expected to compare against 1"); + else + assert(Const->isOne() && "Expected to compare against 1"); + + SDLoc dl(Int); + SDValue Chain = N->getOperand(0); + SDValue Elements = Int.getOperand(2); + SDValue ExitBlock = N->getOperand(2); + + // TODO: Once we start supporting tail predication, we can add another + // operand to WLS for the number of elements processed in a vector loop. + + SDValue Ops[] = { Chain, Elements, ExitBlock }; + SDValue Res = DCI.DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops); + DCI.DAG.ReplaceAllUsesOfValueWith(Int.getValue(1), Int.getOperand(0)); + return Res; +} + /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND. SDValue ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const { @@ -13114,6 +13155,7 @@ case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); + case ISD::BRCOND: return PerformHWLoopCombine(N, DCI, Subtarget); case ARMISD::ADDC: case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget); case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget); Index: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td @@ -106,6 +106,11 @@ SDTCisInt<0>, SDTCisInt<4>]>; +// TODO Add another operand for 'Size' so that we can re-use this node when we +// start supporting *TP versions. +def SDT_ARMWhileLoop : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, + SDTCisVT<1, OtherVT>]>; + def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>; def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>; def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>; @@ -244,6 +249,9 @@ def ARMvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; def ARMvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; +def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMWhileLoop, + [SDNPHasChain]>; + //===----------------------------------------------------------------------===// // ARM Flag Definitions. Index: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td @@ -5216,11 +5216,19 @@ t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size), 4, IIC_Br, []>, Sched<[WriteBr]>; -let isBranch = 1, isTerminator = 1, hasSideEffects = 1 in +let isBranch = 1, isTerminator = 1, hasSideEffects = 1 in { +def t2WhileLoopStart : + t2PseudoInst<(outs), + (ins rGPR:$elts, brtarget:$target), + 4, IIC_Br, []>, + Sched<[WriteBr]>; + def t2LoopEnd : t2PseudoInst<(outs), (ins GPRlr:$elts, brtarget:$target), 8, IIC_Br, []>, Sched<[WriteBr]>; +} // end isBranch, isTerminator, hasSideEffects + } // end isNotDuplicable class CS opcode, list pattern=[]> Index: llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -105,15 +105,20 @@ LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML); auto IsLoopStart = [](MachineInstr &MI) { - return MI.getOpcode() == ARM::t2DoLoopStart; + return MI.getOpcode() == ARM::t2DoLoopStart || + MI.getOpcode() == ARM::t2WhileLoopStart; }; - auto SearchForStart = - [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr* { + // Search the given block for a loop start instruction. If one isn't found, + // and there's only one predecessor block, search that one too. + std::function SearchForStart = + [&IsLoopStart, &SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* { for (auto &MI : *MBB) { if (IsLoopStart(MI)) return &MI; } + if (MBB->pred_size() == 1) + return SearchForStart(*MBB->pred_begin()); return nullptr; }; @@ -122,8 +127,28 @@ MachineInstr *End = nullptr; bool Revert = false; - if (auto *Preheader = ML->getLoopPreheader()) + // Search the preheader for the start intrinsic, or look through the + // predecessors of the header to find exactly one set.iterations intrinsic. + // FIXME: I don't see why we shouldn't be supporting multiple predecessors + // with potentially multiple set.loop.iterations, so we need to enable this. + if (auto *Preheader = ML->getLoopPreheader()) { Start = SearchForStart(Preheader); + } else { + LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n" + << " - Performing manual predecessor search.\n"); + MachineBasicBlock *Pred = nullptr; + for (auto *MBB : ML->getHeader()->predecessors()) { + if (!ML->contains(MBB)) { + if (Pred) { + LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n"); + Start = nullptr; + break; + } + Pred = MBB; + Start = SearchForStart(MBB); + } + } + } // Find the low-overhead loop components and decide whether or not to fall // back to a normal loop. @@ -158,12 +183,11 @@ break; } - if (Start || Dec || End) { - if (!Start || !Dec || !End) - report_fatal_error("Failed to find all loop components"); - } else { + if (!Start && !Dec && !End) { LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n"); return Changed; + } if (!(Start && Dec && End)) { + report_fatal_error("Failed to find all loop components"); } if (!End->getOperand(1).isMBB() || @@ -212,15 +236,21 @@ break; } + unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ? + ARM::t2DLS : ARM::t2WLS; MachineInstrBuilder MIB = - BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(ARM::t2DLS)); - if (InsertPt != Start) - InsertPt->eraseFromParent(); + BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc)); MIB.addDef(ARM::LR); MIB.add(Start->getOperand(0)); - LLVM_DEBUG(dbgs() << "ARM Loops: Inserted DLS: " << *MIB); + if (Opc == ARM::t2WLS) + MIB.add(Start->getOperand(1)); + + if (InsertPt != Start) + InsertPt->eraseFromParent(); Start->eraseFromParent(); + LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB); + return &*MIB; }; // Combine the LoopDec and LoopEnd instructions into LE(TP). @@ -234,24 +264,15 @@ MIB.add(End->getOperand(1)); LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB); - // If there is a branch after loop end, which branches to the fallthrough - // block, remove the branch. - MachineBasicBlock *Latch = End->getParent(); - MachineInstr *Terminator = &Latch->instr_back(); - if (End != Terminator) { - MachineBasicBlock *Exit = ML->getExitBlock(); - if (Latch->isLayoutSuccessor(Exit)) { - LLVM_DEBUG(dbgs() << "ARM Loops: Removing loop exit branch: " - << *Terminator); - Terminator->eraseFromParent(); - } - } End->eraseFromParent(); Dec->eraseFromParent(); + return &*MIB; }; // Generate a subs, or sub and cmp, and a branch instead of an LE. // TODO: Check flags so that we can possibly generate a subs. + // FIXME: Need to check that we're not trashing the CPSR when generating + // the cmp. auto ExpandBranch = [this](MachineInstr *Dec, MachineInstr *End) { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub, cmp, br.\n"); // Create sub @@ -282,12 +303,53 @@ Dec->eraseFromParent(); }; + // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a + // beq that branches to the exit branch. + // FIXME: Need to check that we're not trashing the CPSR when generating the + // cmp. We could also try to generate a cbz if the value in LR is also in + // another low register. + auto ExpandStart = [this](MachineInstr *MI) { + MachineBasicBlock *MBB = MI->getParent(); + MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), + TII->get(ARM::t2CMPri)); + MIB.addReg(ARM::LR); + MIB.addImm(0); + MIB.addImm(ARMCC::AL); + MIB.addReg(ARM::CPSR); + + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc)); + MIB.add(MI->getOperand(1)); // branch target + MIB.addImm(ARMCC::EQ); // condition code + MIB.addReg(ARM::CPSR); + }; + + // TODO: We should be able to automatically remove these branches before we + // get here - probably by teaching analyzeBranch about the pseudo + // instructions. + // If there is an unconditional branch, after I, that just branches to the + // next block, remove it. + auto RemoveDeadBranch = [](MachineInstr *I) { + MachineBasicBlock *BB = I->getParent(); + MachineInstr *Terminator = &BB->instr_back(); + if (Terminator->isUnconditionalBranch() && I != Terminator) { + MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB(); + if (BB->isLayoutSuccessor(Succ)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator); + Terminator->eraseFromParent(); + } + } + }; + if (Revert) { - Start->eraseFromParent(); + if (Start->getOpcode() == ARM::t2WhileLoopStart) + ExpandStart(Start); ExpandBranch(Dec, End); + Start->eraseFromParent(); } else { - ExpandLoopStart(ML, Start); - ExpandLoopEnd(ML, Dec, End); + Start = ExpandLoopStart(ML, Start); + RemoveDeadBranch(Start); + End = ExpandLoopEnd(ML, Dec, End); + RemoveDeadBranch(End); } } Index: llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -806,6 +806,7 @@ default: break; case Intrinsic::set_loop_iterations: + case Intrinsic::test_set_loop_iterations: case Intrinsic::loop_decrement: case Intrinsic::loop_decrement_reg: return true; @@ -841,6 +842,7 @@ LLVMContext &C = L->getHeader()->getContext(); HWLoopInfo.CounterInReg = true; HWLoopInfo.IsNestingLegal = false; + HWLoopInfo.PerformEntryTest = true; HWLoopInfo.CountType = Type::getInt32Ty(C); HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1); return true; Index: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir @@ -0,0 +1,115 @@ +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s +# CHECK: $lr = tMOVr $r0, 13, $noreg +# CHECK: $lr = t2DLS killed $r0 +# CHECK: $lr = t2LEUpdate renamable $lr, %bb.1 + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main" + + define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { + entry: + %scevgep = getelementptr i32, i32* %q, i32 -1 + %scevgep3 = getelementptr i32, i32* %p, i32 -1 + call void @llvm.set.loop.iterations.i32(i32 %n) + br label %while.body + + while.body: + %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ] + %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ] + %0 = phi i32 [ %n, %entry ], [ %2, %while.body ] + %scevgep2 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep6 = getelementptr i32, i32* %lsr.iv4, i32 1 + %1 = load i32, i32* %scevgep2, align 4 + store i32 %1, i32* %scevgep6, align 4 + %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1 + %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) + %3 = icmp ne i32 %2, 0 + br i1 %3, label %while.body, label %while.end + + while.end: + ret i32 0 + } + + declare void @llvm.set.loop.iterations.i32(i32) #0 + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 + declare void @llvm.stackprotector(i8*, i8**) #1 + + attributes #0 = { noduplicate nounwind } + attributes #1 = { nounwind } + +... +--- +name: do_copy +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + + $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r7, killed $lr + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + $lr = tMOVr $r0, 13, $noreg + t2DoLoopStart killed $r0 + renamable $r0 = t2SUBri killed renamable $r1, 4, 14, $noreg, $noreg + renamable $r1 = t2SUBri killed renamable $r2, 4, 14, $noreg, $noreg + + bb.1.while.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $r0, $r1 + + renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep2) + early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep6) + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.1 + t2B %bb.2, 14, $noreg + + bb.2.while.end: + $r0 = t2MOVi 0, 14, $noreg, $noreg + $sp = t2LDMIA_RET $sp, 14, $noreg, def $r7, def $pc, implicit killed $r0 + +... Index: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll @@ -0,0 +1,213 @@ +; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-GLOBAL + +; Not implemented as a mir test so that changes the generic HardwareLoop can +; also be tested. These functions have been taken from +; Transforms/HardwareLoops/loop-guards.ll in which can be seen the generation +; of a few test.set intrinsics, but only one (ne_trip_count) gets generated +; here. Simplifications result in icmps changing and maybe also the CFG. So, +; TODO: Teach the HardwareLoops some better pattern recognition. + +; CHECK-GLOBAL-NOT: DoLoopStart +; CHECK-GLOBAL-NOT: WhileLoopStart +; CHECK-GLOBAL-NOT: LoopEnd + +; CHECK: ne_and_guard +; CHECK: body: +; CHECK: bb.0.entry: +; CHECK: t2CMPri renamable $lr, 0 +; CHECK: tBcc %bb.3 +; CHECK: bb.1.while.body.preheader: +; CHECK: $lr = t2DLS renamable $lr +; CHECK: bb.2.while.body: +; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2 +define void @ne_and_guard(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { +entry: + %brmerge.demorgan = and i1 %t1, %t2 + %cmp6 = icmp ne i32 %N, 0 + %or.cond = and i1 %brmerge.demorgan, %cmp6 + br i1 %or.cond, label %while.body, label %if.end + +while.body: ; preds = %while.body, %entry + %i.09 = phi i32 [ %inc, %while.body ], [ 0, %entry ] + %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %entry ] + %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %entry ] + %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1 + %tmp = load i32, i32* %b.addr.07, align 4 + %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1 + store i32 %tmp, i32* %a.addr.08, align 4 + %inc = add nuw i32 %i.09, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %if.end, label %while.body + +if.end: ; preds = %while.body, %entry + ret void +} + +; TODO: This could generate WLS +; CHECK: ne_preheader +; CHECK: body: +; CHECK: bb.0.entry: +; CHECK: t2CMPri renamable $lr, 0 +; CHECK: tBcc %bb.3 +; CHECK: bb.1.while.body.preheader: +; CHECK: $lr = t2DLS renamable $lr +; CHECK: bb.2.while.body: +; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2 +define void @ne_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { +entry: + %brmerge.demorgan = and i1 %t1, %t2 + br i1 %brmerge.demorgan, label %while.preheader, label %if.end + +while.preheader: ; preds = %entry + %cmp = icmp ne i32 %N, 0 + br i1 %cmp, label %while.body, label %if.end + +while.body: ; preds = %while.body, %while.preheader + %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ] + %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ] + %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ] + %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1 + %tmp = load i32, i32* %b.addr.07, align 4 + %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1 + store i32 %tmp, i32* %a.addr.08, align 4 + %inc = add nuw i32 %i.09, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %if.end, label %while.body + +if.end: ; preds = %while.body, %while.preheader, %entry + ret void +} + +; TODO: This could generate WLS +; CHECK: eq_preheader +; CHECK: body: +; CHECK: bb.0.entry: +; CHECK: t2CMPri renamable $lr, 0 +; CHECK: tBcc %bb.3 +; CHECK: bb.1.while.body.preheader: +; CHECK: $lr = t2DLS renamable $lr +; CHECK: bb.2.while.body: +; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2 +define void @eq_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { +entry: + %brmerge.demorgan = and i1 %t1, %t2 + br i1 %brmerge.demorgan, label %while.preheader, label %if.end + +while.preheader: ; preds = %entry + %cmp = icmp eq i32 %N, 0 + br i1 %cmp, label %if.end, label %while.body + +while.body: ; preds = %while.body, %while.preheader + %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ] + %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ] + %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ] + %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1 + %tmp = load i32, i32* %b.addr.07, align 4 + %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1 + store i32 %tmp, i32* %a.addr.08, align 4 + %inc = add nuw i32 %i.09, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %if.end, label %while.body + +if.end: ; preds = %while.body, %while.preheader, %entry + ret void +} + +; TODO: This could generate WLS +; CHECK: ne_prepreheader +; CHECK: body: +; CHECK: bb.0.entry: +; CHECK: t2CMPri renamable $lr, 0 +; CHECK: tBcc %bb.3 +; CHECK: bb.1.while.body.preheader: +; CHECK: $lr = t2DLS renamable $lr +; CHECK: bb.2.while.body: +; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2 +define void @ne_prepreheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { +entry: + %cmp = icmp ne i32 %N, 0 + br i1 %cmp, label %while.preheader, label %if.end + +while.preheader: ; preds = %entry + %brmerge.demorgan = and i1 %t1, %t2 + br i1 %brmerge.demorgan, label %while.body, label %if.end + +while.body: ; preds = %while.body, %while.preheader + %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ] + %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ] + %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ] + %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1 + %tmp = load i32, i32* %b.addr.07, align 4 + %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1 + store i32 %tmp, i32* %a.addr.08, align 4 + %inc = add nuw i32 %i.09, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %if.end, label %while.body + +if.end: ; preds = %while.body, %while.preheader, %entry + ret void +} + +; CHECK: be_ne +; CHECK: body: +; CHECK: bb.0.entry: +; CHECK: $lr = t2DLS renamable $lr +; CHECK: bb.1.do.body: +; CHECK: $lr = t2LEUpdate renamable $lr, %bb.1 +define void @be_ne(i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { +entry: + %cmp = icmp ne i32 %N, 0 + %sub = sub i32 %N, 1 + %be = select i1 %cmp, i32 0, i32 %sub + %cmp.1 = icmp ne i32 %be, 0 + br i1 %cmp.1, label %do.body, label %if.end + +do.body: ; preds = %do.body, %entry + %b.addr.0 = phi i32* [ %incdec.ptr, %do.body ], [ %b, %entry ] + %a.addr.0 = phi i32* [ %incdec.ptr3, %do.body ], [ %a, %entry ] + %i.0 = phi i32 [ %inc, %do.body ], [ 0, %entry ] + %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.0, i32 1 + %tmp = load i32, i32* %b.addr.0, align 4 + %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.0, i32 1 + store i32 %tmp, i32* %a.addr.0, align 4 + %inc = add nuw i32 %i.0, 1 + %cmp.2 = icmp ult i32 %inc, %N + br i1 %cmp.2, label %do.body, label %if.end + +if.end: ; preds = %do.body, %entry + ret void +} + +; TODO: Remove the tMOVr in the preheader! +; CHECK: ne_trip_count +; CHECK: body: +; CHECK: bb.0.entry: +; CHECK: $lr = t2WLS $r3, %bb.3 +; CHECK: bb.1.do.body.preheader: +; CHECK: $lr = tMOVr +; CHECK: bb.2.do.body: +; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2 +define void @ne_trip_count(i1 zeroext %t1, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { +entry: + br label %do.body.preheader + +do.body.preheader: + %cmp = icmp ne i32 %N, 0 + br i1 %cmp, label %do.body, label %if.end + +do.body: + %b.addr.0 = phi i32* [ %incdec.ptr, %do.body ], [ %b, %do.body.preheader ] + %a.addr.0 = phi i32* [ %incdec.ptr3, %do.body ], [ %a, %do.body.preheader ] + %i.0 = phi i32 [ %inc, %do.body ], [ 0, %do.body.preheader ] + %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.0, i32 1 + %tmp = load i32, i32* %b.addr.0, align 4 + %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.0, i32 1 + store i32 %tmp, i32* %a.addr.0, align 4 + %inc = add nuw i32 %i.0, 1 + %cmp.1 = icmp ult i32 %inc, %N + br i1 %cmp.1, label %do.body, label %if.end + +if.end: ; preds = %do.body, %entry + ret void +} Index: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/massive.mir =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/massive.mir +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/massive.mir @@ -0,0 +1,145 @@ +# RUN: llc -mtriple=armv8.1m.main -mattr=+lob -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s +# CHECK: for.body: +# CHECK-NOT: t2DLS +# CHECK-NOT: t2LEUpdate + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-unknown-unknown" + + ; Function Attrs: norecurse nounwind + define dso_local arm_aapcscc void @massive(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr { + entry: + %cmp8 = icmp eq i32 %N, 0 + br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader + + for.body.preheader: ; preds = %entry + %scevgep = getelementptr i32, i32* %a, i32 -1 + %scevgep4 = getelementptr i32, i32* %c, i32 -1 + %scevgep8 = getelementptr i32, i32* %b, i32 -1 + call void @llvm.set.loop.iterations.i32(i32 %N) + br label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + ret void + + for.body: ; preds = %for.body, %for.body.preheader + %lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ] + %lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ] + %lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ] + %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ] + %size = call i32 @llvm.arm.space(i32 4096, i32 undef) + %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1 + %1 = load i32, i32* %scevgep11, align 4, !tbaa !3 + %scevgep7 = getelementptr i32, i32* %lsr.iv5, i32 1 + %2 = load i32, i32* %scevgep7, align 4, !tbaa !3 + %mul = mul nsw i32 %2, %1 + %scevgep3 = getelementptr i32, i32* %lsr.iv1, i32 1 + store i32 %mul, i32* %scevgep3, align 4, !tbaa !3 + %scevgep2 = getelementptr i32, i32* %lsr.iv1, i32 1 + %scevgep6 = getelementptr i32, i32* %lsr.iv5, i32 1 + %scevgep10 = getelementptr i32, i32* %lsr.iv9, i32 1 + %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) + %4 = icmp ne i32 %3, 0 + br i1 %4, label %for.body, label %for.cond.cleanup + } + + declare i32 @llvm.arm.space(i32, i32) #1 + declare void @llvm.set.loop.iterations.i32(i32) #2 + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2 + + attributes #1 = { nounwind } + attributes #2 = { noduplicate nounwind } + + !llvm.module.flags = !{!0, !1} + !llvm.ident = !{!2} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 1, !"min_enum_size", i32 4} + !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"} + !3 = !{!4, !4, i64 0} + !4 = !{!"int", !5, i64 0} + !5 = !{!"omnipotent char", !6, i64 0} + !6 = !{!"Simple C/C++ TBAA"} + +... +--- +name: massive +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: false +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } + - { reg: '$r3', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + + frame-setup tPUSH 14, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + $r7 = frame-setup tMOVr $sp, 14, $noreg + frame-setup CFI_INSTRUCTION def_cfa_register $r7 + tCMPi8 $r3, 0, 14, $noreg, implicit-def $cpsr + t2IT 0, 8, implicit-def $itstate + tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate + renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg + renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg + renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg + $lr = tMOVr $r3, 14, $noreg + t2DoLoopStart killed $r3 + + bb.1.for.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + + dead renamable $r3 = SPACE 4096, undef renamable $r0 + renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep11, !tbaa !3) + renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7, !tbaa !3) + renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg + early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep3, !tbaa !3) + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.1 + tB %bb.2, 14, $noreg + + bb.2.for.cond.cleanup: + tPOP_RET 14, $noreg, def $r7, def $pc + +... Index: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/multiblock-massive.mir =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/multiblock-massive.mir +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/multiblock-massive.mir @@ -0,0 +1,160 @@ +# RUN: llc -mtriple=armv8.1m.main -mattr=+lob -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s +# CHECK: for.body: +# CHECK-NOT: t2DLS +# CHECK-NOT: t2LEUpdate + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-unknown-unknown" + + define dso_local arm_aapcscc void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr { + entry: + %cmp8 = icmp eq i32 %N, 0 + br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader + + for.body.preheader: ; preds = %entry + br label %for.body + + for.cond.cleanup: ; preds = %for.end, %entry + ret void + + for.body: ; preds = %for.body.preheader, %for.end + %lsr.iv4 = phi i32* [ %b, %for.body.preheader ], [ %scevgep5, %for.end ] + %lsr.iv2 = phi i32* [ %c, %for.body.preheader ], [ %scevgep3, %for.end ] + %lsr.iv1 = phi i32* [ %a, %for.body.preheader ], [ %scevgep, %for.end ] + %lsr.iv = phi i32 [ %N, %for.body.preheader ], [ %lsr.iv.next, %for.end ] + %size = call i32 @llvm.arm.space(i32 3072, i32 undef) + %0 = load i32, i32* %lsr.iv4, align 4, !tbaa !3 + %1 = load i32, i32* %lsr.iv2, align 4, !tbaa !3 + %mul = mul nsw i32 %1, %0 + store i32 %mul, i32* %lsr.iv1, align 4, !tbaa !3 + %cmp = icmp ne i32 %0, 0 + br i1 %cmp, label %middle.block, label %for.end + + middle.block: ; preds = %for.body + %div = udiv i32 %1, %0 + store i32 %div, i32* %lsr.iv1, align 4, !tbaa !3 + %size.1 = call i32 @llvm.arm.space(i32 1024, i32 undef) + br label %for.end + + for.end: ; preds = %middle.block, %for.body + %lsr.iv.next = add i32 %lsr.iv, -1 + %scevgep = getelementptr i32, i32* %lsr.iv1, i32 1 + %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 1 + %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1 + %exitcond = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + } + + declare i32 @llvm.arm.space(i32, i32) #1 + attributes #1 = { nounwind } + + !llvm.module.flags = !{!0, !1} + !llvm.ident = !{!2} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 1, !"min_enum_size", i32 4} + !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"} + !3 = !{!4, !4, i64 0} + !4 = !{!"int", !5, i64 0} + !5 = !{!"omnipotent char", !6, i64 0} + !6 = !{!"Simple C/C++ TBAA"} + +... +--- +name: size_limit +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: false +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } + - { reg: '$r3', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 16 + offsetAdjustment: -8 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.3(0x50000000) + + frame-setup tPUSH 14, $noreg, killed $r4, killed $r6, $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 16 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + frame-setup CFI_INSTRUCTION offset $r6, -12 + frame-setup CFI_INSTRUCTION offset $r4, -16 + $r7 = frame-setup tADDrSPi $sp, 2, 14, $noreg + frame-setup CFI_INSTRUCTION def_cfa $r7, 8 + tCBNZ $r3, %bb.3 + + bb.1.for.cond.cleanup: + tPOP_RET 14, $noreg, def $r4, def $r6, def $r7, def $pc + + bb.2.for.end: + successors: %bb.1(0x04000000), %bb.3(0x7c000000) + + renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 4, 14, $noreg + renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 4, 14, $noreg + renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 4, 14, $noreg + renamable $r3, $cpsr = tSUBi8 killed renamable $r3, 1, 14, $noreg + tBcc %bb.1, 0, killed $cpsr + + bb.3.for.body: + successors: %bb.4(0x50000000), %bb.2(0x30000000) + + dead renamable $r12 = SPACE 3072, undef renamable $r0 + renamable $r12 = t2LDRi12 renamable $r1, 0, 14, $noreg :: (load 4 from %ir.lsr.iv4, !tbaa !3) + renamable $lr = t2LDRi12 renamable $r2, 0, 14, $noreg :: (load 4 from %ir.lsr.iv2, !tbaa !3) + t2CMPri renamable $r12, 0, 14, $noreg, implicit-def $cpsr + renamable $r4 = nsw t2MUL renamable $lr, renamable $r12, 14, $noreg + tSTRi killed renamable $r4, renamable $r0, 0, 14, $noreg :: (store 4 into %ir.lsr.iv1, !tbaa !3) + t2Bcc %bb.2, 0, killed $cpsr + + bb.4.middle.block: + successors: %bb.2(0x80000000) + + renamable $r4 = t2UDIV killed renamable $lr, killed renamable $r12, 14, $noreg + tSTRi killed renamable $r4, renamable $r0, 0, 14, $noreg :: (store 4 into %ir.lsr.iv1, !tbaa !3) + dead renamable $r4 = SPACE 1024, undef renamable $r0 + t2B %bb.2, 14, $noreg + +... Index: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.mir =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.mir +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.mir @@ -0,0 +1,130 @@ +# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s + +# CHECK: while.body: +# CHECK-NOT: t2DLS +# CHECK-NOT: t2LEUpdate + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabi" + + define i32 @skip_spill(i32 %n) #0 { + entry: + %cmp6 = icmp eq i32 %n, 0 + br i1 %cmp6, label %while.end, label %while.body.preheader + + while.body.preheader: ; preds = %entry + call void @llvm.set.loop.iterations.i32(i32 %n) + br label %while.body + + while.body: ; preds = %while.body, %while.body.preheader + %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ] + %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ] + %call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() + %add = add nsw i32 %call, %res.07 + %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) + %2 = icmp ne i32 %1, 0 + br i1 %2, label %while.body, label %while.end + + while.end: ; preds = %while.body, %entry + %res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ] + ret i32 %res.0.lcssa + } + + declare i32 @bar(...) local_unnamed_addr #0 + declare void @llvm.set.loop.iterations.i32(i32) #1 + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 + + attributes #0 = { "target-features"="+mve.fp" } + attributes #1 = { noduplicate nounwind } + attributes #2 = { nounwind } + +... +--- +name: skip_spill +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: false +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 16 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.4(0x30000000), %bb.1(0x50000000) + + frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 16 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + frame-setup CFI_INSTRUCTION offset $r5, -12 + frame-setup CFI_INSTRUCTION offset $r4, -16 + tCBZ $r0, %bb.4 + + bb.1.while.body.preheader: + successors: %bb.2(0x80000000) + + $lr = tMOVr $r0, 14, $noreg + renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg + t2DoLoopStart killed $r0 + + bb.2.while.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) + + $r5 = tMOVr killed $lr, 14, $noreg + tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0 + $lr = tMOVr killed $r5, 14, $noreg + renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.2 + tB %bb.3, 14, $noreg + + bb.3.while.end: + $r0 = tMOVr killed $r4, 14, $noreg + tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 + + bb.4: + renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg + $r0 = tMOVr killed $r4, 14, $noreg + tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 + +... Index: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-spill.mir =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-spill.mir +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-spill.mir @@ -0,0 +1,130 @@ +# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s + +# CHECK: while.body: +# CHECK-NOT: t2DLS +# CHECK-NOT: t2LEUpdate + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabi" + + define i32 @skip_spill(i32 %n) #0 { + entry: + %cmp6 = icmp eq i32 %n, 0 + br i1 %cmp6, label %while.end, label %while.body.preheader + + while.body.preheader: ; preds = %entry + call void @llvm.set.loop.iterations.i32(i32 %n) + br label %while.body + + while.body: ; preds = %while.body, %while.body.preheader + %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ] + %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ] + %call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() + %add = add nsw i32 %call, %res.07 + %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) + %2 = icmp ne i32 %1, 0 + br i1 %2, label %while.body, label %while.end + + while.end: ; preds = %while.body, %entry + %res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ] + ret i32 %res.0.lcssa + } + + declare i32 @bar(...) local_unnamed_addr #0 + declare void @llvm.set.loop.iterations.i32(i32) #1 + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 + + attributes #0 = { "target-features"="+mve.fp" } + attributes #1 = { noduplicate nounwind } + attributes #2 = { nounwind } + +... +--- +name: skip_spill +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: false +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 16 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.4(0x30000000), %bb.1(0x50000000) + + frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 16 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + frame-setup CFI_INSTRUCTION offset $r5, -12 + frame-setup CFI_INSTRUCTION offset $r4, -16 + tCBZ $r0, %bb.4 + + bb.1.while.body.preheader: + successors: %bb.2(0x80000000) + + $lr = tMOVr $r0, 14, $noreg + renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg + t2DoLoopStart killed $r0 + + bb.2.while.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) + + $r5 = tMOVr killed $lr, 14, $noreg + tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0 + $lr = tMOVr killed $r5, 14, $noreg + renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.2 + tB %bb.3, 14, $noreg + + bb.3.while.end: + $r0 = tMOVr killed $r4, 14, $noreg + tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 + + bb.4: + renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg + $r0 = tMOVr killed $r4, 14, $noreg + tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 + +... Index: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir @@ -0,0 +1,130 @@ +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+lob -run-pass=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s +# CHECK: body: +# CHECK: bb.0.entry: +# CHECK: t2CMPri $lr, 0, 14 +# CHECK-NEXT: t2Bcc %bb.3, 0, $cpsr +# CHECK-NEXT: tB %bb.1 +# CHECK: bb.1.do.body.preheader: +# CHECK: $lr = tMOVr killed $r3 +# CHECK: bb.2.do.body: +# CHECK: $lr = t2SUBri killed renamable $lr, 1, 14 +# CHECK-NEXT: t2CMPri $lr, 0, 14, $cpsr +# CHECK-NEXT: t2Bcc %bb.2, 1, $cpsr +# CHECK-NEXT: tB %bb.3, 14 +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main" + + define void @ne_trip_count(i1 zeroext %t1, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) #0 { + entry: + %cmp = icmp ne i32 %N, 0 + %0 = call i1 @llvm.test.set.loop.iterations.i32(i32 %N) + br i1 %0, label %do.body.preheader, label %if.end + + do.body.preheader: ; preds = %entry + br label %do.body + + do.body: ; preds = %do.body.preheader, %do.body + %i.0 = phi i32 [ %inc, %do.body ], [ 0, %do.body.preheader ] + %1 = phi i32 [ %N, %do.body.preheader ], [ %2, %do.body ] + %scevgep = getelementptr i32, i32* %b, i32 %i.0 + %scevgep1 = getelementptr i32, i32* %a, i32 %i.0 + %size = call i32 @llvm.arm.space(i32 4096, i32 undef) + %tmp = load i32, i32* %scevgep, align 4 + store i32 %tmp, i32* %scevgep1, align 4 + %inc = add nuw i32 %i.0, 1 + %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %1, i32 1) + %3 = icmp ne i32 %2, 0 + br i1 %3, label %do.body, label %if.end + + if.end: ; preds = %do.body, %entry + ret void + } + + declare i32 @llvm.arm.space(i32, i32) #1 + declare i1 @llvm.test.set.loop.iterations.i32(i32) #2 + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2 + + attributes #0 = { "target-features"="+lob" } + attributes #1 = { nounwind "target-features"="+lob" } + attributes #2 = { noduplicate nounwind } + attributes #3 = { nounwind } + +... +--- +name: ne_trip_count +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: false +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } + - { reg: '$r3', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.3(0x40000000) + + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + t2WhileLoopStart $r3, %bb.3 + tB %bb.1, 14, $noreg + + bb.1.do.body.preheader: + successors: %bb.2(0x80000000) + + $lr = tMOVr killed $r3, 14, $noreg + renamable $r0, dead $cpsr = tMOVi8 0, 14, $noreg + + bb.2.do.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) + + dead renamable $r3 = SPACE 4096, undef renamable $r0 + renamable $r3 = t2LDRs renamable $r2, renamable $r0, 2, 14, $noreg :: (load 4 from %ir.scevgep) + t2STRs killed renamable $r3, renamable $r1, renamable $r0, 2, 14, $noreg :: (store 4 into %ir.scevgep1) + renamable $r0, dead $cpsr = nuw tADDi8 killed renamable $r0, 1, 14, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.2 + tB %bb.3, 14, $noreg + + bb.3.if.end: + tPOP_RET 14, $noreg, def $r7, def $pc + +... Index: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir @@ -0,0 +1,155 @@ +# RUN: llc -mtriple=armv8.1m.main -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s +# CHECK: entry: +# CHECK: $lr = t2DLS +# CHECK: for.body: +# CHECK: $lr = t2LEUpdate renamable $lr + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-unknown-unknown" + + ; Function Attrs: norecurse nounwind + define dso_local arm_aapcscc void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr #0 { + entry: + %cmp8 = icmp eq i32 %N, 0 + br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader + + for.body.preheader: ; preds = %entry + %scevgep = getelementptr i32, i32* %a, i32 -1 + %scevgep4 = getelementptr i32, i32* %c, i32 -1 + %scevgep8 = getelementptr i32, i32* %b, i32 -1 + call void @llvm.set.loop.iterations.i32(i32 %N) + br label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + ret void + + for.body: ; preds = %for.body, %for.body.preheader + %lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ] + %lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ] + %lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ] + %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ] + %size = call i32 @llvm.arm.space(i32 4072, i32 undef) + %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1 + %1 = load i32, i32* %scevgep11, align 4, !tbaa !3 + %scevgep7 = getelementptr i32, i32* %lsr.iv5, i32 1 + %2 = load i32, i32* %scevgep7, align 4, !tbaa !3 + %mul = mul nsw i32 %2, %1 + %scevgep3 = getelementptr i32, i32* %lsr.iv1, i32 1 + store i32 %mul, i32* %scevgep3, align 4, !tbaa !3 + %scevgep2 = getelementptr i32, i32* %lsr.iv1, i32 1 + %scevgep6 = getelementptr i32, i32* %lsr.iv5, i32 1 + %scevgep10 = getelementptr i32, i32* %lsr.iv9, i32 1 + %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) + %4 = icmp ne i32 %3, 0 + br i1 %4, label %for.body, label %for.cond.cleanup + } + + ; Function Attrs: nounwind + declare i32 @llvm.arm.space(i32, i32) #1 + + ; Function Attrs: noduplicate nounwind + declare void @llvm.set.loop.iterations.i32(i32) #2 + + ; Function Attrs: noduplicate nounwind + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2 + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #1 + + attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" } + attributes #1 = { nounwind } + attributes #2 = { noduplicate nounwind } + + !llvm.module.flags = !{!0, !1} + !llvm.ident = !{!2} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 1, !"min_enum_size", i32 4} + !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"} + !3 = !{!4, !4, i64 0} + !4 = !{!"int", !5, i64 0} + !5 = !{!"omnipotent char", !6, i64 0} + !6 = !{!"Simple C/C++ TBAA"} + +... +--- +name: size_limit +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: false +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } + - { reg: '$r3', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + + frame-setup tPUSH 14, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + $r7 = frame-setup tMOVr $sp, 14, $noreg + frame-setup CFI_INSTRUCTION def_cfa_register $r7 + tCMPi8 $r3, 0, 14, $noreg, implicit-def $cpsr + t2IT 0, 8, implicit-def $itstate + tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate + renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg + renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg + renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg + $lr = tMOVr $r3, 14, $noreg + t2DoLoopStart killed $r3 + + bb.1.for.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + + dead renamable $r3 = SPACE 4072, undef renamable $r0 + renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep11, !tbaa !3) + renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7, !tbaa !3) + renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg + early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep3, !tbaa !3) + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.1 + tB %bb.2, 14, $noreg + + bb.2.for.cond.cleanup: + tPOP_RET 14, $noreg, def $r7, def $pc + +... Index: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/switch.mir =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/switch.mir +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/switch.mir @@ -0,0 +1,198 @@ +# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops -o - +# CHECK: bb.1.for.body.preheader: +# CHECK: $lr = t2DLS +# CHECK-NOT: t2LoopDec +# CHECK: bb.6.for.inc: +# CHECK: $lr = t2LEUpdate renamable $lr, %bb.2 + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-unknown-unknown" + + ; Function Attrs: norecurse nounwind readonly + define dso_local arm_aapcscc i32 @search(i8* nocapture readonly %c, i32 %N) local_unnamed_addr #0 { + entry: + %cmp11 = icmp eq i32 %N, 0 + br i1 %cmp11, label %for.cond.cleanup, label %for.body.preheader + + for.body.preheader: + call void @llvm.set.loop.iterations.i32(i32 %N) + br label %for.body + + for.cond.cleanup: + %found.0.lcssa = phi i32 [ 0, %entry ], [ %found.1, %for.inc ] + %spaces.0.lcssa = phi i32 [ 0, %entry ], [ %spaces.1, %for.inc ] + %sub = sub nsw i32 %found.0.lcssa, %spaces.0.lcssa + ret i32 %sub + + for.body: + %lsr.iv1 = phi i8* [ %c, %for.body.preheader ], [ %scevgep, %for.inc ] + %spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %for.body.preheader ] + %found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %for.body.preheader ] + %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.inc ] + %1 = load i8, i8* %lsr.iv1, align 1 + %2 = zext i8 %1 to i32 + switch i32 %2, label %for.inc [ + i32 108, label %sw.bb + i32 111, label %sw.bb + i32 112, label %sw.bb + i32 32, label %sw.bb1 + ] + + sw.bb: + %inc = add nsw i32 %found.012, 1 + br label %for.inc + + sw.bb1: + %inc2 = add nsw i32 %spaces.013, 1 + br label %for.inc + + for.inc: + %found.1 = phi i32 [ %found.012, %for.body ], [ %found.012, %sw.bb1 ], [ %inc, %sw.bb ] + %spaces.1 = phi i32 [ %spaces.013, %for.body ], [ %inc2, %sw.bb1 ], [ %spaces.013, %sw.bb ] + %scevgep = getelementptr i8, i8* %lsr.iv1, i32 1 + %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) + %4 = icmp ne i32 %3, 0 + br i1 %4, label %for.body, label %for.cond.cleanup + } + + declare void @llvm.set.loop.iterations.i32(i32) #1 + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 + declare void @llvm.stackprotector(i8*, i8**) #2 + + attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" } + attributes #1 = { noduplicate nounwind } + attributes #2 = { nounwind } + +... +--- +name: search +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 16 + offsetAdjustment: -8 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.3(0x50000000) + liveins: $r0, $r1, $r4, $r6, $lr + + $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r4, killed $r6, $r7, killed $lr + frame-setup CFI_INSTRUCTION def_cfa_offset 16 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + frame-setup CFI_INSTRUCTION offset $r6, -12 + frame-setup CFI_INSTRUCTION offset $r4, -16 + $r7 = frame-setup t2ADDri $sp, 8, 14, $noreg, $noreg + frame-setup CFI_INSTRUCTION def_cfa $r7, 8 + t2CMPri $r1, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.1, 0, killed $cpsr + + bb.3.for.body.preheader: + successors: %bb.4(0x80000000) + liveins: $r0, $r1 + + $lr = tMOVr $r1, 14, $noreg + t2DoLoopStart killed $r1 + renamable $r1 = t2MOVi 0, 14, $noreg, $noreg + renamable $r12 = t2MOVi 1, 14, $noreg, $noreg + renamable $r2 = t2MOVi 0, 14, $noreg, $noreg + + bb.4.for.body: + successors: %bb.5(0x26666665), %bb.6(0x5999999b) + liveins: $lr, $r0, $r1, $r2, $r12 + + renamable $r3 = t2LDRBi12 renamable $r0, 0, 14, $noreg :: (load 1 from %ir.lsr.iv1) + renamable $r4 = t2SUBri renamable $r3, 108, 14, $noreg, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2CMPri renamable $r4, 4, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.5, 8, killed $cpsr + + bb.6.for.body: + successors: %bb.7(0x6db6db6e), %bb.5(0x12492492) + liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r12 + + renamable $r4 = t2LSLrr renamable $r12, killed renamable $r4, 14, $noreg, $noreg + t2TSTri killed renamable $r4, 25, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.5, 0, killed $cpsr + + bb.7.sw.bb: + successors: %bb.8(0x80000000) + liveins: $lr, $r0, $r1, $r2, $r12 + + renamable $r2 = nsw t2ADDri killed renamable $r2, 1, 14, $noreg, $noreg + t2B %bb.8, 14, $noreg + + bb.5.for.body: + successors: %bb.8(0x80000000) + liveins: $lr, $r0, $r1, $r2, $r3, $r12 + + t2CMPri killed renamable $r3, 32, 14, $noreg, implicit-def $cpsr + BUNDLE implicit-def dead $itstate, implicit-def $r1, implicit killed $r1, implicit killed $cpsr { + t2IT 0, 8, implicit-def $itstate + renamable $r1 = nsw t2ADDri killed renamable $r1, 1, 0, killed $cpsr, $noreg, implicit $r1, implicit internal killed $itstate + } + + bb.8.for.inc: + successors: %bb.4(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $r0, $r1, $r2, $r12 + + renamable $r0 = t2ADDri killed renamable $r0, 1, 14, $noreg, $noreg + t2LoopEnd renamable $lr, %bb.4 + t2B %bb.2, 14, $noreg + + bb.2.for.cond.cleanup: + liveins: $r1, $r2 + + renamable $r0 = nsw t2SUBrr killed renamable $r2, killed renamable $r1, 14, $noreg, $noreg + $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r6, def $r7, def $pc, implicit killed $r0 + + bb.1: + renamable $r2 = t2MOVi 0, 14, $noreg, $noreg + renamable $r1 = t2MOVi 0, 14, $noreg, $noreg + renamable $r0 = nsw t2SUBrr killed renamable $r2, killed renamable $r1, 14, $noreg, $noreg + $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r6, def $r7, def $pc, implicit killed $r0 + +... Index: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while.mir =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while.mir +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while.mir @@ -0,0 +1,131 @@ +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+lob %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s + +# TODO: Remove the lr = tMOVr +# CHECK: body: +# CHECK: $lr = t2WLS $r2, [[EXIT:%bb[.0-9]+]] +# CHECK: [[PREHEADER:bb[.0-9a-z]+]]: +# CHECK: $lr = tMOVr killed $r2 +# CHECK: [[BODY:bb[.0-9a-z]+]]: +# CHECK: $lr = t2LEUpdate renamable $lr + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-unknown" + + ; Function Attrs: norecurse nounwind optsize + define dso_local arm_aapcscc void @copy(i16* nocapture %a, i16* nocapture readonly %b, i32 %N) { + entry: + %cmp4 = icmp eq i32 %N, 0 + %0 = call i1 @llvm.test.set.loop.iterations.i32(i32 %N) + br i1 %0, label %while.body.preheader, label %while.end + + while.body.preheader: ; preds = %entry + br label %while.body + + while.body: ; preds = %while.body, %while.body.preheader + %a.addr.06 = phi i16* [ %incdec.ptr1, %while.body ], [ %a, %while.body.preheader ] + %b.addr.05 = phi i16* [ %incdec.ptr, %while.body ], [ %b, %while.body.preheader ] + %1 = phi i32 [ %N, %while.body.preheader ], [ %3, %while.body ] + %incdec.ptr = getelementptr inbounds i16, i16* %b.addr.05, i32 1 + %2 = load i16, i16* %b.addr.05, align 2, !tbaa !3 + %incdec.ptr1 = getelementptr inbounds i16, i16* %a.addr.06, i32 1 + store i16 %2, i16* %a.addr.06, align 2, !tbaa !3 + %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %1, i32 1) + %4 = icmp ne i32 %3, 0 + br i1 %4, label %while.body, label %while.end + + while.end: ; preds = %while.body, %entry + ret void + } + + declare i1 @llvm.test.set.loop.iterations.i32(i32) #1 + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 + + attributes #1 = { noduplicate nounwind } + attributes #2 = { nounwind } + + !llvm.module.flags = !{!0, !1} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 1, !"min_enum_size", i32 4} + !3 = !{!4, !4, i64 0} + !4 = !{!"short", !5, i64 0} + !5 = !{!"omnipotent char", !6, i64 0} + !6 = !{!"Simple C/C++ TBAA"} + +... +--- +name: copy +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: false +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.3(0x40000000) + + frame-setup tPUSH 14, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + $r7 = frame-setup tMOVr $sp, 14, $noreg + frame-setup CFI_INSTRUCTION def_cfa_register $r7 + t2WhileLoopStart $r2, %bb.3 + tB %bb.1, 14, $noreg + + bb.1.while.body.preheader: + successors: %bb.2(0x80000000) + + $lr = tMOVr killed $r2, 14, $noreg + + bb.2.while.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) + + renamable $r2, renamable $r1 = t2LDRH_POST killed renamable $r1, 2, 14, $noreg :: (load 2 from %ir.b.addr.05, !tbaa !3) + early-clobber renamable $r0 = t2STRH_POST killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store 2 into %ir.a.addr.06, !tbaa !3) + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.2 + tB %bb.3, 14, $noreg + + bb.3.while.end: + tPOP_RET 14, $noreg, def $r7, def $pc + +... Index: llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir =================================================================== --- llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir +++ llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir @@ -1,115 +0,0 @@ -# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s -# CHECK: $lr = tMOVr $r0, 13, $noreg -# CHECK: $lr = t2DLS killed $r0 -# CHECK: $lr = t2LEUpdate renamable $lr, %bb.1 - ---- | - target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" - target triple = "thumbv8.1m.main" - - define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { - entry: - %scevgep = getelementptr i32, i32* %q, i32 -1 - %scevgep3 = getelementptr i32, i32* %p, i32 -1 - call void @llvm.set.loop.iterations.i32(i32 %n) - br label %while.body - - while.body: - %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ] - %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ] - %0 = phi i32 [ %n, %entry ], [ %2, %while.body ] - %scevgep2 = getelementptr i32, i32* %lsr.iv, i32 1 - %scevgep6 = getelementptr i32, i32* %lsr.iv4, i32 1 - %1 = load i32, i32* %scevgep2, align 4 - store i32 %1, i32* %scevgep6, align 4 - %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1 - %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1 - %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) - %3 = icmp ne i32 %2, 0 - br i1 %3, label %while.body, label %while.end - - while.end: - ret i32 0 - } - - declare void @llvm.set.loop.iterations.i32(i32) #0 - declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 - declare void @llvm.stackprotector(i8*, i8**) #1 - - attributes #0 = { noduplicate nounwind } - attributes #1 = { nounwind } - -... ---- -name: do_copy -alignment: 1 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -registers: [] -liveins: - - { reg: '$r0', virtual-reg: '' } - - { reg: '$r1', virtual-reg: '' } - - { reg: '$r2', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 8 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 0 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: - - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -constants: [] -machineFunctionInfo: {} -body: | - bb.0.entry: - successors: %bb.1(0x80000000) - liveins: $r0, $r1, $r2, $r7, $lr - - $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r7, killed $lr - frame-setup CFI_INSTRUCTION def_cfa_offset 8 - frame-setup CFI_INSTRUCTION offset $lr, -4 - frame-setup CFI_INSTRUCTION offset $r7, -8 - $lr = tMOVr $r0, 13, $noreg - t2DoLoopStart killed $r0 - renamable $r0 = t2SUBri killed renamable $r1, 4, 14, $noreg, $noreg - renamable $r1 = t2SUBri killed renamable $r2, 4, 14, $noreg, $noreg - - bb.1.while.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) - liveins: $lr, $r0, $r1 - - renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep2) - early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep6) - renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1 - t2B %bb.2, 14, $noreg - - bb.2.while.end: - $r0 = t2MOVi 0, 14, $noreg, $noreg - $sp = t2LDMIA_RET $sp, 14, $noreg, def $r7, def $pc, implicit killed $r0 - -... Index: llvm/trunk/test/Transforms/HardwareLoops/ARM/do-rem.ll =================================================================== --- llvm/trunk/test/Transforms/HardwareLoops/ARM/do-rem.ll +++ llvm/trunk/test/Transforms/HardwareLoops/ARM/do-rem.ll @@ -3,10 +3,14 @@ @g = common local_unnamed_addr global i32* null, align 4 ; CHECK-LABEL: do_with_i32_urem +; CHECK: entry: +; CHECK: [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %n) +; CHECK: br i1 [[TEST]], label %while.body.preheader, label %while.end + ; CHECK: while.body.preheader: -; CHECK: call void @llvm.set.loop.iterations.i32(i32 %n) ; CHECK-NEXT: br label %while.body +; CHECK: while.body: ; CHECK: [[REM:%[^ ]+]] = phi i32 [ %n, %while.body.preheader ], [ [[LOOP_DEC:%[^ ]+]], %while.body ] ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1) ; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 @@ -38,10 +42,14 @@ } ; CHECK-LABEL: do_with_i32_srem +; CHECK: entry: +; CHECK: [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %n) +; CHECK: br i1 [[TEST]], label %while.body.preheader, label %while.end + ; CHECK: while.body.preheader: -; CHECK: call void @llvm.set.loop.iterations.i32(i32 %n) ; CHECK-NEXT: br label %while.body +; CHECK: while.body: ; CHECK: [[REM:%[^ ]+]] = phi i32 [ %n, %while.body.preheader ], [ [[LOOP_DEC:%[^ ]+]], %while.body ] ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1) ; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 @@ -73,10 +81,14 @@ } ; CHECK-LABEL: do_with_i32_udiv +; CHECK: entry: +; CHECK: [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %n) +; CHECK: br i1 [[TEST]], label %while.body.preheader, label %while.end + ; CHECK: while.body.preheader: -; CHECK: call void @llvm.set.loop.iterations.i32(i32 %n) ; CHECK-NEXT: br label %while.body +; CHECK: while.body: ; CHECK: [[REM:%[^ ]+]] = phi i32 [ %n, %while.body.preheader ], [ [[LOOP_DEC:%[^ ]+]], %while.body ] ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1) ; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 @@ -108,10 +120,14 @@ } ; CHECK-LABEL: do_with_i32_sdiv +; CHECK: entry: +; CHECK: [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %n) +; CHECK: br i1 [[TEST]], label %while.body.preheader, label %while.end + ; CHECK: while.body.preheader: -; CHECK: call void @llvm.set.loop.iterations.i32(i32 %n) ; CHECK-NEXT: br label %while.body +; CHECK: while.body: ; CHECK: [[REM:%[^ ]+]] = phi i32 [ %n, %while.body.preheader ], [ [[LOOP_DEC:%[^ ]+]], %while.body ] ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1) ; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 @@ -143,7 +159,7 @@ } ; CHECK-LABEL: do_with_i64_urem -; CHECK-NOT: llvm.set.loop.iterations +; CHECK-NOT: llvm.{{.*}}.loop.iterations ; CHECK-NOT: llvm.loop.decrement define i64 @do_with_i64_urem(i32 %n) { entry: @@ -172,7 +188,7 @@ } ; CHECK-LABEL: do_with_i64_srem -; CHECK-NOT: llvm.set.loop.iterations +; CHECK-NOT: llvm.{{.*}}.loop.iterations ; CHECK-NOT: llvm.loop.decrement define i64 @do_with_i64_srem(i32 %n) { entry: @@ -201,7 +217,7 @@ } ; CHECK-LABEL: do_with_i64_udiv -; CHECK-NOT: llvm.set.loop.iterations +; CHECK-NOT: llvm.{{.*}}.loop.iterations ; CHECK-NOT: llvm.loop.decrement define i64 @do_with_i64_udiv(i32 %n) { entry: @@ -230,7 +246,7 @@ } ; CHECK-LABEL: do_with_i64_sdiv -; CHECK-NOT: call void @llvm.set.loop.iterations +; CHECK-NOT: call void @llvm.{{.*}}.loop.iterations ; CHECK-NOT: call i32 @llvm.loop.decrement define i64 @do_with_i64_sdiv(i32 %n) { entry: Index: llvm/trunk/test/Transforms/HardwareLoops/ARM/fp-emulation.ll =================================================================== --- llvm/trunk/test/Transforms/HardwareLoops/ARM/fp-emulation.ll +++ llvm/trunk/test/Transforms/HardwareLoops/ARM/fp-emulation.ll @@ -2,9 +2,13 @@ ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+soft-float -hardware-loops -disable-arm-loloops=false %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SOFT ; CHECK-LABEL: test_fptosi -; CHECK: while.body.lr.ph: +; CHECK-SOFT-NOT: call void @llvm.set.loop.iterations + +; CHECK: entry: ; CHECK-FP: [[CMP:%[^ ]+]] = icmp ugt i32 %n, 1 ; CHECK-FP: [[COUNT:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 1 + +; CHECK: while.body.lr.ph: ; CHECK-FP: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]]) ; CHECK-FP-NEXT: br label %while.body @@ -13,8 +17,6 @@ ; CHECK-FP: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 ; CHECK-FP: br i1 [[CMP]], label %while.body, label %cleanup.loopexit -; CHECK-SOFT-NOT: call void @llvm.set.loop.iterations - define void @test_fptosi(i32 %n, i32** %g, double** %d) { entry: %n.off = add i32 %n, -1 @@ -53,9 +55,10 @@ } ; CHECK-LABEL: test_fptoui -; CHECK-FP: while.body.lr.ph: +; CHECK: entry: ; CHECK-FP: [[CMP:%[^ ]+]] = icmp ugt i32 %n, 1 ; CHECK-FP: [[COUNT:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 1 +; CHECK-FP: while.body.lr.ph: ; CHECK-FP: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]]) ; CHECK-FP-NEXT: br label %while.body @@ -104,10 +107,11 @@ } ; CHECK-LABEL: load_store_float +; CHECK: entry: +; CHECK: [[CMP:%[^ ]+]] = icmp ugt i32 %n, 1 +; CHECK: [[COUNT:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 1 ; CHECK: while.body.lr.ph: -; CHECK: [[CMP:%[^ ]+]] = icmp ugt i32 %n, 1 -; CHECK: [[COUNT:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 1 -; CHECK: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]]) +; CHECK: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]]) ; CHECK-NEXT: br label %while.body ; CHECK: [[REM:%[^ ]+]] = phi i32 [ [[COUNT]], %while.body.lr.ph ], [ [[LOOP_DEC:%[^ ]+]], %if.end4 ] @@ -152,12 +156,11 @@ } ; CHECK-LABEL: fp_add -; CHECK: while.body.lr.ph: - ; CHECK-SOFT-NOT: call void @llvm.set.loop.iterations - +; CHECK: entry: ; CHECK-FP: [[CMP:%[^ ]+]] = icmp ugt i32 %n, 1 ; CHECK-FP: [[COUNT:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 1 +; CHECK: while.body.lr.ph: ; CHECK-FP: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]]) ; CHECK: br label %while.body Index: llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir =================================================================== --- llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir +++ llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir @@ -1,145 +0,0 @@ -# RUN: llc -mtriple=armv8.1m.main -mattr=+lob -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s -# CHECK: for.body: -# CHECK-NOT: t2DLS -# CHECK-NOT: t2LEUpdate - ---- | - target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" - target triple = "thumbv8.1m.main-unknown-unknown" - - ; Function Attrs: norecurse nounwind - define dso_local arm_aapcscc void @massive(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr { - entry: - %cmp8 = icmp eq i32 %N, 0 - br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader - - for.body.preheader: ; preds = %entry - %scevgep = getelementptr i32, i32* %a, i32 -1 - %scevgep4 = getelementptr i32, i32* %c, i32 -1 - %scevgep8 = getelementptr i32, i32* %b, i32 -1 - call void @llvm.set.loop.iterations.i32(i32 %N) - br label %for.body - - for.cond.cleanup: ; preds = %for.body, %entry - ret void - - for.body: ; preds = %for.body, %for.body.preheader - %lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ] - %lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ] - %lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ] - %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ] - %size = call i32 @llvm.arm.space(i32 4096, i32 undef) - %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1 - %1 = load i32, i32* %scevgep11, align 4, !tbaa !3 - %scevgep7 = getelementptr i32, i32* %lsr.iv5, i32 1 - %2 = load i32, i32* %scevgep7, align 4, !tbaa !3 - %mul = mul nsw i32 %2, %1 - %scevgep3 = getelementptr i32, i32* %lsr.iv1, i32 1 - store i32 %mul, i32* %scevgep3, align 4, !tbaa !3 - %scevgep2 = getelementptr i32, i32* %lsr.iv1, i32 1 - %scevgep6 = getelementptr i32, i32* %lsr.iv5, i32 1 - %scevgep10 = getelementptr i32, i32* %lsr.iv9, i32 1 - %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) - %4 = icmp ne i32 %3, 0 - br i1 %4, label %for.body, label %for.cond.cleanup - } - - declare i32 @llvm.arm.space(i32, i32) #1 - declare void @llvm.set.loop.iterations.i32(i32) #2 - declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2 - - attributes #1 = { nounwind } - attributes #2 = { noduplicate nounwind } - - !llvm.module.flags = !{!0, !1} - !llvm.ident = !{!2} - - !0 = !{i32 1, !"wchar_size", i32 4} - !1 = !{i32 1, !"min_enum_size", i32 4} - !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"} - !3 = !{!4, !4, i64 0} - !4 = !{!"int", !5, i64 0} - !5 = !{!"omnipotent char", !6, i64 0} - !6 = !{!"Simple C/C++ TBAA"} - -... ---- -name: massive -alignment: 1 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: false -hasWinCFI: false -registers: [] -liveins: - - { reg: '$r0', virtual-reg: '' } - - { reg: '$r1', virtual-reg: '' } - - { reg: '$r2', virtual-reg: '' } - - { reg: '$r3', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 8 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 0 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: - - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -constants: [] -machineFunctionInfo: {} -body: | - bb.0.entry: - successors: %bb.1(0x80000000) - - frame-setup tPUSH 14, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp - frame-setup CFI_INSTRUCTION def_cfa_offset 8 - frame-setup CFI_INSTRUCTION offset $lr, -4 - frame-setup CFI_INSTRUCTION offset $r7, -8 - $r7 = frame-setup tMOVr $sp, 14, $noreg - frame-setup CFI_INSTRUCTION def_cfa_register $r7 - tCMPi8 $r3, 0, 14, $noreg, implicit-def $cpsr - t2IT 0, 8, implicit-def $itstate - tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate - renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg - renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg - renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg - $lr = tMOVr $r3, 14, $noreg - t2DoLoopStart killed $r3 - - bb.1.for.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) - - dead renamable $r3 = SPACE 4096, undef renamable $r0 - renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep11, !tbaa !3) - renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7, !tbaa !3) - renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg - early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep3, !tbaa !3) - renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1 - tB %bb.2, 14, $noreg - - bb.2.for.cond.cleanup: - tPOP_RET 14, $noreg, def $r7, def $pc - -... Index: llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir =================================================================== --- llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir +++ llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir @@ -1,160 +0,0 @@ -# RUN: llc -mtriple=armv8.1m.main -mattr=+lob -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s -# CHECK: for.body: -# CHECK-NOT: t2DLS -# CHECK-NOT: t2LEUpdate - ---- | - target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" - target triple = "thumbv8.1m.main-unknown-unknown" - - define dso_local arm_aapcscc void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr { - entry: - %cmp8 = icmp eq i32 %N, 0 - br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader - - for.body.preheader: ; preds = %entry - br label %for.body - - for.cond.cleanup: ; preds = %for.end, %entry - ret void - - for.body: ; preds = %for.body.preheader, %for.end - %lsr.iv4 = phi i32* [ %b, %for.body.preheader ], [ %scevgep5, %for.end ] - %lsr.iv2 = phi i32* [ %c, %for.body.preheader ], [ %scevgep3, %for.end ] - %lsr.iv1 = phi i32* [ %a, %for.body.preheader ], [ %scevgep, %for.end ] - %lsr.iv = phi i32 [ %N, %for.body.preheader ], [ %lsr.iv.next, %for.end ] - %size = call i32 @llvm.arm.space(i32 3072, i32 undef) - %0 = load i32, i32* %lsr.iv4, align 4, !tbaa !3 - %1 = load i32, i32* %lsr.iv2, align 4, !tbaa !3 - %mul = mul nsw i32 %1, %0 - store i32 %mul, i32* %lsr.iv1, align 4, !tbaa !3 - %cmp = icmp ne i32 %0, 0 - br i1 %cmp, label %middle.block, label %for.end - - middle.block: ; preds = %for.body - %div = udiv i32 %1, %0 - store i32 %div, i32* %lsr.iv1, align 4, !tbaa !3 - %size.1 = call i32 @llvm.arm.space(i32 1024, i32 undef) - br label %for.end - - for.end: ; preds = %middle.block, %for.body - %lsr.iv.next = add i32 %lsr.iv, -1 - %scevgep = getelementptr i32, i32* %lsr.iv1, i32 1 - %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 1 - %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1 - %exitcond = icmp eq i32 %lsr.iv.next, 0 - br i1 %exitcond, label %for.cond.cleanup, label %for.body - } - - declare i32 @llvm.arm.space(i32, i32) #1 - attributes #1 = { nounwind } - - !llvm.module.flags = !{!0, !1} - !llvm.ident = !{!2} - - !0 = !{i32 1, !"wchar_size", i32 4} - !1 = !{i32 1, !"min_enum_size", i32 4} - !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"} - !3 = !{!4, !4, i64 0} - !4 = !{!"int", !5, i64 0} - !5 = !{!"omnipotent char", !6, i64 0} - !6 = !{!"Simple C/C++ TBAA"} - -... ---- -name: size_limit -alignment: 1 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: false -hasWinCFI: false -registers: [] -liveins: - - { reg: '$r0', virtual-reg: '' } - - { reg: '$r1', virtual-reg: '' } - - { reg: '$r2', virtual-reg: '' } - - { reg: '$r3', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 16 - offsetAdjustment: -8 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 0 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: - - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -constants: [] -machineFunctionInfo: {} -body: | - bb.0.entry: - successors: %bb.1(0x30000000), %bb.3(0x50000000) - - frame-setup tPUSH 14, $noreg, killed $r4, killed $r6, $r7, killed $lr, implicit-def $sp, implicit $sp - frame-setup CFI_INSTRUCTION def_cfa_offset 16 - frame-setup CFI_INSTRUCTION offset $lr, -4 - frame-setup CFI_INSTRUCTION offset $r7, -8 - frame-setup CFI_INSTRUCTION offset $r6, -12 - frame-setup CFI_INSTRUCTION offset $r4, -16 - $r7 = frame-setup tADDrSPi $sp, 2, 14, $noreg - frame-setup CFI_INSTRUCTION def_cfa $r7, 8 - tCBNZ $r3, %bb.3 - - bb.1.for.cond.cleanup: - tPOP_RET 14, $noreg, def $r4, def $r6, def $r7, def $pc - - bb.2.for.end: - successors: %bb.1(0x04000000), %bb.3(0x7c000000) - - renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 4, 14, $noreg - renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 4, 14, $noreg - renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 4, 14, $noreg - renamable $r3, $cpsr = tSUBi8 killed renamable $r3, 1, 14, $noreg - tBcc %bb.1, 0, killed $cpsr - - bb.3.for.body: - successors: %bb.4(0x50000000), %bb.2(0x30000000) - - dead renamable $r12 = SPACE 3072, undef renamable $r0 - renamable $r12 = t2LDRi12 renamable $r1, 0, 14, $noreg :: (load 4 from %ir.lsr.iv4, !tbaa !3) - renamable $lr = t2LDRi12 renamable $r2, 0, 14, $noreg :: (load 4 from %ir.lsr.iv2, !tbaa !3) - t2CMPri renamable $r12, 0, 14, $noreg, implicit-def $cpsr - renamable $r4 = nsw t2MUL renamable $lr, renamable $r12, 14, $noreg - tSTRi killed renamable $r4, renamable $r0, 0, 14, $noreg :: (store 4 into %ir.lsr.iv1, !tbaa !3) - t2Bcc %bb.2, 0, killed $cpsr - - bb.4.middle.block: - successors: %bb.2(0x80000000) - - renamable $r4 = t2UDIV killed renamable $lr, killed renamable $r12, 14, $noreg - tSTRi killed renamable $r4, renamable $r0, 0, 14, $noreg :: (store 4 into %ir.lsr.iv1, !tbaa !3) - dead renamable $r4 = SPACE 1024, undef renamable $r0 - t2B %bb.2, 14, $noreg - -... Index: llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir =================================================================== --- llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir +++ llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir @@ -1,130 +0,0 @@ -# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s - -# CHECK: while.body: -# CHECK-NOT: t2DLS -# CHECK-NOT: t2LEUpdate - ---- | - target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" - target triple = "thumbv8.1m.main-arm-none-eabi" - - define i32 @skip_spill(i32 %n) #0 { - entry: - %cmp6 = icmp eq i32 %n, 0 - br i1 %cmp6, label %while.end, label %while.body.preheader - - while.body.preheader: ; preds = %entry - call void @llvm.set.loop.iterations.i32(i32 %n) - br label %while.body - - while.body: ; preds = %while.body, %while.body.preheader - %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ] - %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ] - %call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() - %add = add nsw i32 %call, %res.07 - %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) - %2 = icmp ne i32 %1, 0 - br i1 %2, label %while.body, label %while.end - - while.end: ; preds = %while.body, %entry - %res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ] - ret i32 %res.0.lcssa - } - - declare i32 @bar(...) local_unnamed_addr #0 - declare void @llvm.set.loop.iterations.i32(i32) #1 - declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 - - attributes #0 = { "target-features"="+mve.fp" } - attributes #1 = { noduplicate nounwind } - attributes #2 = { nounwind } - -... ---- -name: skip_spill -alignment: 1 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: false -hasWinCFI: false -registers: [] -liveins: - - { reg: '$r0', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 16 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: true - hasCalls: true - stackProtector: '' - maxCallFrameSize: 0 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: - - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -constants: [] -machineFunctionInfo: {} -body: | - bb.0.entry: - successors: %bb.4(0x30000000), %bb.1(0x50000000) - - frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp - frame-setup CFI_INSTRUCTION def_cfa_offset 16 - frame-setup CFI_INSTRUCTION offset $lr, -4 - frame-setup CFI_INSTRUCTION offset $r7, -8 - frame-setup CFI_INSTRUCTION offset $r5, -12 - frame-setup CFI_INSTRUCTION offset $r4, -16 - tCBZ $r0, %bb.4 - - bb.1.while.body.preheader: - successors: %bb.2(0x80000000) - - $lr = tMOVr $r0, 14, $noreg - renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg - t2DoLoopStart killed $r0 - - bb.2.while.body: - successors: %bb.2(0x7c000000), %bb.3(0x04000000) - - $r5 = tMOVr killed $lr, 14, $noreg - tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0 - $lr = tMOVr killed $r5, 14, $noreg - renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg - renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.2 - tB %bb.3, 14, $noreg - - bb.3.while.end: - $r0 = tMOVr killed $r4, 14, $noreg - tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 - - bb.4: - renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg - $r0 = tMOVr killed $r4, 14, $noreg - tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 - -... Index: llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir =================================================================== --- llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir +++ llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir @@ -1,130 +0,0 @@ -# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s - -# CHECK: while.body: -# CHECK-NOT: t2DLS -# CHECK-NOT: t2LEUpdate - ---- | - target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" - target triple = "thumbv8.1m.main-arm-none-eabi" - - define i32 @skip_spill(i32 %n) #0 { - entry: - %cmp6 = icmp eq i32 %n, 0 - br i1 %cmp6, label %while.end, label %while.body.preheader - - while.body.preheader: ; preds = %entry - call void @llvm.set.loop.iterations.i32(i32 %n) - br label %while.body - - while.body: ; preds = %while.body, %while.body.preheader - %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ] - %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ] - %call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() - %add = add nsw i32 %call, %res.07 - %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) - %2 = icmp ne i32 %1, 0 - br i1 %2, label %while.body, label %while.end - - while.end: ; preds = %while.body, %entry - %res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ] - ret i32 %res.0.lcssa - } - - declare i32 @bar(...) local_unnamed_addr #0 - declare void @llvm.set.loop.iterations.i32(i32) #1 - declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 - - attributes #0 = { "target-features"="+mve.fp" } - attributes #1 = { noduplicate nounwind } - attributes #2 = { nounwind } - -... ---- -name: skip_spill -alignment: 1 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: false -hasWinCFI: false -registers: [] -liveins: - - { reg: '$r0', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 16 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: true - hasCalls: true - stackProtector: '' - maxCallFrameSize: 0 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: - - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -constants: [] -machineFunctionInfo: {} -body: | - bb.0.entry: - successors: %bb.4(0x30000000), %bb.1(0x50000000) - - frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp - frame-setup CFI_INSTRUCTION def_cfa_offset 16 - frame-setup CFI_INSTRUCTION offset $lr, -4 - frame-setup CFI_INSTRUCTION offset $r7, -8 - frame-setup CFI_INSTRUCTION offset $r5, -12 - frame-setup CFI_INSTRUCTION offset $r4, -16 - tCBZ $r0, %bb.4 - - bb.1.while.body.preheader: - successors: %bb.2(0x80000000) - - $lr = tMOVr $r0, 14, $noreg - renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg - t2DoLoopStart killed $r0 - - bb.2.while.body: - successors: %bb.2(0x7c000000), %bb.3(0x04000000) - - $r5 = tMOVr killed $lr, 14, $noreg - tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0 - $lr = tMOVr killed $r5, 14, $noreg - renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg - renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.2 - tB %bb.3, 14, $noreg - - bb.3.while.end: - $r0 = tMOVr killed $r4, 14, $noreg - tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 - - bb.4: - renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg - $r0 = tMOVr killed $r4, 14, $noreg - tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 - -... Index: llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll =================================================================== --- llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll +++ llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll @@ -3,7 +3,7 @@ ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=-lob -hardware-loops %s -S -o - | FileCheck %s --check-prefix=DISABLED ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -disable-arm-loloops=false %s -o - | FileCheck %s --check-prefix=CHECK-LLC -; DISABLED-NOT: llvm.set.loop.iterations +; DISABLED-NOT: llvm.{{.*}}.loop.iterations ; DISABLED-NOT: llvm.loop.decrement @g = common local_unnamed_addr global i32* null, align 4 @@ -46,9 +46,12 @@ } ; CHECK-LABEL: do_inc1 +; CHECK: entry: +; CHECK: [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %n) +; CHECK: br i1 [[TEST]], label %while.body.lr.ph, label %while.end + ; CHECK: while.body.lr.ph: -; CHECK: call void @llvm.set.loop.iterations.i32(i32 %n) -; CHECK-NEXT: br label %while.body +; CHECK: br label %while.body ; CHECK: [[REM:%[^ ]+]] = phi i32 [ %n, %while.body.lr.ph ], [ [[LOOP_DEC:%[^ ]+]], %while.body ] ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1) @@ -56,12 +59,12 @@ ; CHECK: br i1 [[CMP]], label %while.body, label %while.end.loopexit ; CHECK-LLC-LABEL:do_inc1: -; CHECK-LLC: dls lr, +; CHECK-LLC: wls lr, {{.*}}, [[LOOP_EXIT:.[LBB_0-3]+]] ; CHECK-LLC-NOT: mov lr, ; CHECK-LLC: [[LOOP_HEADER:\.LBB[0-9_]+]]: ; CHECK-LLC: le lr, [[LOOP_HEADER]] ; CHECK-LLC-NOT: b [[LOOP_EXIT:\.LBB[0-9_]+]] -; CHECK-LLC: [[LOOP_EXIT:\.LBB[0-9_]+]]: +; CHECK-LLC: [[LOOP_EXIT]]: define i32 @do_inc1(i32 %n) { entry: @@ -91,26 +94,26 @@ } ; CHECK-LABEL: do_inc2 -; CHECK: while.body.lr.ph: +; CHECK: entry: ; CHECK: [[ROUND:%[^ ]+]] = add i32 %n, -1 ; CHECK: [[HALVE:%[^ ]+]] = lshr i32 [[ROUND]], 1 ; CHECK: [[COUNT:%[^ ]+]] = add nuw i32 [[HALVE]], 1 -; CHECK: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]]) -; CHECK-NEXT: br label %while.body -; CHECK: [[REM:%[^ ]+]] = phi i32 [ [[COUNT]], %while.body.lr.ph ], [ [[LOOP_DEC:%[^ ]+]], %while.body ] -; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1) -; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 -; CHECK: br i1 [[CMP]], label %while.body, label %while.end.loopexit +; CHECK: while.body.lr.ph: +; CHECK: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]]) +; CHECK: br label %while.body +; CHECK: while.body: +; CHECK: [[REM:%[^ ]+]] = phi i32 [ [[COUNT]], %while.body.lr.ph ], [ [[LOOP_DEC:%[^ ]+]], %while.body ] +; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1) +; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 +; CHECK: br i1 [[CMP]], label %while.body, label %while.end.loopexit ; CHECK-LLC: do_inc2: ; CHECK-LLC-NOT: mov lr, -; CHECK-LLC: dls lr, +; CHECK-LLC: dls lr, {{.*}} ; CHECK-LLC-NOT: mov lr, ; CHECK-LLC: [[LOOP_HEADER:\.LBB[0-9._]+]]: ; CHECK-LLC: le lr, [[LOOP_HEADER]] -; CHECK-LLC-NOT: b [[LOOP_EXIT:\.LBB[0-9._]+]] -; CHECK-LLC: [[LOOP_EXIT:\.LBB[0-9_]+]]: define i32 @do_inc2(i32 %n) { entry: @@ -141,15 +144,17 @@ ; CHECK-LABEL: do_dec2 -; CHECK: while.body.lr.ph: +; CHECK: entry: ; CHECK: [[ROUND:%[^ ]+]] = add i32 %n, 1 ; CHECK: [[CMP:%[^ ]+]] = icmp slt i32 %n, 2 ; CHECK: [[SMIN:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 2 ; CHECK: [[SUB:%[^ ]+]] = sub i32 [[ROUND]], [[SMIN]] ; CHECK: [[HALVE:%[^ ]+]] = lshr i32 [[SUB]], 1 ; CHECK: [[COUNT:%[^ ]+]] = add nuw i32 [[HALVE]], 1 + +; CHECK: while.body.lr.ph: ; CHECK: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]]) -; CHECK-NEXT: br label %while.body +; CHECK: br label %while.body ; CHECK: [[REM:%[^ ]+]] = phi i32 [ [[COUNT]], %while.body.lr.ph ], [ [[LOOP_DEC:%[^ ]+]], %while.body ] ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1) @@ -158,12 +163,11 @@ ; CHECK-LLC: do_dec2 ; CHECK-LLC-NOT: mov lr, -; CHECK-LLC: dls lr, +; CHECK-LLC: dls lr, {{.*}} ; CHECK-LLC-NOT: mov lr, ; CHECK-LLC: [[LOOP_HEADER:\.LBB[0-9_]+]]: ; CHECK-LLC: le lr, [[LOOP_HEADER]] ; CHECK-LLC-NOT: b . -; CHECK-LLC: @ %while.end define i32 @do_dec2(i32 %n) { entry: %cmp6 = icmp sgt i32 %n, 0 Index: llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir =================================================================== --- llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir +++ llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir @@ -1,155 +0,0 @@ -# RUN: llc -mtriple=armv8.1m.main -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s -# CHECK: entry: -# CHECK: $lr = t2DLS -# CHECK: for.body: -# CHECK: $lr = t2LEUpdate renamable $lr - ---- | - target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" - target triple = "thumbv8.1m.main-unknown-unknown" - - ; Function Attrs: norecurse nounwind - define dso_local arm_aapcscc void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr #0 { - entry: - %cmp8 = icmp eq i32 %N, 0 - br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader - - for.body.preheader: ; preds = %entry - %scevgep = getelementptr i32, i32* %a, i32 -1 - %scevgep4 = getelementptr i32, i32* %c, i32 -1 - %scevgep8 = getelementptr i32, i32* %b, i32 -1 - call void @llvm.set.loop.iterations.i32(i32 %N) - br label %for.body - - for.cond.cleanup: ; preds = %for.body, %entry - ret void - - for.body: ; preds = %for.body, %for.body.preheader - %lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ] - %lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ] - %lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ] - %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ] - %size = call i32 @llvm.arm.space(i32 4072, i32 undef) - %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1 - %1 = load i32, i32* %scevgep11, align 4, !tbaa !3 - %scevgep7 = getelementptr i32, i32* %lsr.iv5, i32 1 - %2 = load i32, i32* %scevgep7, align 4, !tbaa !3 - %mul = mul nsw i32 %2, %1 - %scevgep3 = getelementptr i32, i32* %lsr.iv1, i32 1 - store i32 %mul, i32* %scevgep3, align 4, !tbaa !3 - %scevgep2 = getelementptr i32, i32* %lsr.iv1, i32 1 - %scevgep6 = getelementptr i32, i32* %lsr.iv5, i32 1 - %scevgep10 = getelementptr i32, i32* %lsr.iv9, i32 1 - %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) - %4 = icmp ne i32 %3, 0 - br i1 %4, label %for.body, label %for.cond.cleanup - } - - ; Function Attrs: nounwind - declare i32 @llvm.arm.space(i32, i32) #1 - - ; Function Attrs: noduplicate nounwind - declare void @llvm.set.loop.iterations.i32(i32) #2 - - ; Function Attrs: noduplicate nounwind - declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2 - - ; Function Attrs: nounwind - declare void @llvm.stackprotector(i8*, i8**) #1 - - attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" } - attributes #1 = { nounwind } - attributes #2 = { noduplicate nounwind } - - !llvm.module.flags = !{!0, !1} - !llvm.ident = !{!2} - - !0 = !{i32 1, !"wchar_size", i32 4} - !1 = !{i32 1, !"min_enum_size", i32 4} - !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"} - !3 = !{!4, !4, i64 0} - !4 = !{!"int", !5, i64 0} - !5 = !{!"omnipotent char", !6, i64 0} - !6 = !{!"Simple C/C++ TBAA"} - -... ---- -name: size_limit -alignment: 1 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: false -hasWinCFI: false -registers: [] -liveins: - - { reg: '$r0', virtual-reg: '' } - - { reg: '$r1', virtual-reg: '' } - - { reg: '$r2', virtual-reg: '' } - - { reg: '$r3', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 8 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 0 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: - - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -constants: [] -machineFunctionInfo: {} -body: | - bb.0.entry: - successors: %bb.1(0x80000000) - - frame-setup tPUSH 14, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp - frame-setup CFI_INSTRUCTION def_cfa_offset 8 - frame-setup CFI_INSTRUCTION offset $lr, -4 - frame-setup CFI_INSTRUCTION offset $r7, -8 - $r7 = frame-setup tMOVr $sp, 14, $noreg - frame-setup CFI_INSTRUCTION def_cfa_register $r7 - tCMPi8 $r3, 0, 14, $noreg, implicit-def $cpsr - t2IT 0, 8, implicit-def $itstate - tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate - renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg - renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg - renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg - $lr = tMOVr $r3, 14, $noreg - t2DoLoopStart killed $r3 - - bb.1.for.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) - - dead renamable $r3 = SPACE 4072, undef renamable $r0 - renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep11, !tbaa !3) - renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7, !tbaa !3) - renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg - early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep3, !tbaa !3) - renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1 - tB %bb.2, 14, $noreg - - bb.2.for.cond.cleanup: - tPOP_RET 14, $noreg, def $r7, def $pc - -... Index: llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll =================================================================== --- llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll +++ llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll @@ -109,6 +109,35 @@ ret i32 0 } +; CHECK-LABEL: pre_existing_test_set +; CHECK: call i1 @llvm.test.set.loop.iterations +; CHECK-NOT: llvm.set{{.*}}.loop.iterations +; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) +; CHECK-NOT: call i32 @llvm.loop.decrement.reg +define i32 @pre_existing_test_set(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { +entry: + %guard = call i1 @llvm.test.set.loop.iterations.i32(i32 %n) + br i1 %guard, label %while.preheader, label %while.end + +while.preheader: + br label %while.body + +while.body: ; preds = %while.body, %entry + %q.addr.05 = phi i32* [ %incdec.ptr, %while.body ], [ %q, %while.preheader ] + %p.addr.04 = phi i32* [ %incdec.ptr1, %while.body ], [ %p, %while.preheader ] + %0 = phi i32 [ %n, %while.preheader ], [ %2, %while.body ] + %incdec.ptr = getelementptr inbounds i32, i32* %q.addr.05, i32 1 + %1 = load i32, i32* %q.addr.05, align 4 + %incdec.ptr1 = getelementptr inbounds i32, i32* %p.addr.04, i32 1 + store i32 %1, i32* %p.addr.04, align 4 + %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) + %3 = icmp ne i32 %2, 0 + br i1 %3, label %while.body, label %while.end + +while.end: ; preds = %while.body + ret i32 0 +} + ; CHECK-LABEL: pre_existing_inner ; CHECK-NOT: llvm.set.loop.iterations ; CHECK: while.cond1.preheader.us: @@ -223,14 +252,16 @@ } ; CHECK-LABEL: search +; CHECK: entry: +; CHECK: [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %N) +; CHECK: br i1 [[TEST]], label %for.body.preheader, label %for.cond.cleanup ; CHECK: for.body.preheader: -; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N) -; CHECK: br label %for.body +; CHECK: br label %for.body ; CHECK: for.body: ; CHECK: for.inc: -; CHECK: [[LOOP_DEC:%[^ ]+]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32 -; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 -; CHECK: br i1 [[CMP]], label %for.body, label %for.cond.cleanup +; CHECK: [[LOOP_DEC:%[^ ]+]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32 +; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 +; CHECK: br i1 [[CMP]], label %for.body, label %for.cond.cleanup define i32 @search(i8* nocapture readonly %c, i32 %N) { entry: %cmp11 = icmp eq i32 %N, 0 @@ -276,16 +307,16 @@ ; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32( ; TODO: We should be able to support the unrolled loop body. -; CHECK-UNROLL-LABEL: unroll_inc_int: +; CHECK-UNROLL-LABEL: unroll_inc_int ; CHECK-UNROLL: [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader ; CHECK-UNROLL-NOT: dls ; CHECK-UNROLL: [[LOOP:.LBB[0-9_]+]]: @ %for.body ; CHECK-UNROLL-NOT: le lr, [[LOOP]] ; CHECK-UNROLL: bne [[LOOP]] -; CHECK-UNROLL: %for.body.epil.preheader -; CHECK-UNROLL: dls -; CHECK-UNROLL: %for.body.epil -; CHECK-UNROLL: le +; CHECK-UNROLL: wls lr, lr, [[EXIT:.LBB[0-9_]+]] +; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]: +; CHECK-UNROLL: le lr, [[EPIL]] +; CHECK-UNROLL-NEXT: [[EXIT]] define void @unroll_inc_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { entry: @@ -310,24 +341,27 @@ } ; CHECK-LABEL: unroll_inc_unsigned -; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N) +; CHECK: call i1 @llvm.test.set.loop.iterations.i32(i32 %N) ; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32( ; CHECK-LLC-LABEL: unroll_inc_unsigned: -; CHECK-LLC: dls lr, [[COUNT:r[0-9]+]] -; CHECK-LLC: le lr +; CHECK-LLC: wls lr, r3, [[EXIT:.LBB[0-9_]+]] +; CHECK-LLC: [[HEADER:.LBB[0-9_]+]]: +; CHECK-LLC: le lr, [[HEADER]] +; CHECK-LLC-NEXT: [[EXIT]]: ; TODO: We should be able to support the unrolled loop body. -; CHECK-UNROLL-LABEL: unroll_inc_unsigned: +; CHECK-UNROLL-LABEL: unroll_inc_unsigned ; CHECK-UNROLL: [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader ; CHECK-UNROLL-NOT: dls ; CHECK-UNROLL: [[LOOP:.LBB[0-9_]+]]: @ %for.body ; CHECK-UNROLL-NOT: le lr, [[LOOP]] ; CHECK-UNROLL: bne [[LOOP]] -; CHECK-UNROLL: %for.body.epil.preheader -; CHECK-UNROLL: dls -; CHECK-UNROLL: %for.body.epil -; CHECK-UNROLL: le +; CHECK-UNROLL: wls lr, lr, [[EPIL_EXIT:.LBB[0-9_]+]] +; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]: +; CHECK-UNROLL: le lr, [[EPIL]] +; CHECK-UNROLL: [[EPIL_EXIT]]: +; CHECK-UNROLL: pop define void @unroll_inc_unsigned(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { entry: %cmp8 = icmp eq i32 %N, 0 @@ -357,15 +391,21 @@ ; TODO: An unnecessary register is being held to hold COUNT, lr should just ; be used instead. ; CHECK-LLC-LABEL: unroll_dec_int: -; CHECK-LLC: dls lr, [[COUNT:r[0-9]+]] -; CHECK-LLC: subs [[COUNT]], #1 -; CHECK-LLC: le lr - -; CHECK-UNROLL-LABEL: unroll_dec_int -; CHECK-UNROLL: dls lr -; CHECK-UNROLL: le lr -; CHECK-UNROLL: dls lr -; CHECK-UNROLL: le lr +; CHECK-LLC: dls lr, r3 +; CHECK-LLC-NOT: mov lr, r3 +; CHECK-LLC: [[HEADER:.LBB[0-9_]+]]: +; CHECK-LLC: le lr, [[HEADER]] + +; CHECK-UNROLL-LABEL: unroll_dec_int: +; CHECK-UNROLL: wls lr, {{.*}}, [[PROLOGUE_EXIT:.LBB[0-9_]+]] +; CHECK-UNROLL-NEXT: [[PROLOGUE:.LBB[0-9_]+]]: +; CHECK-UNROLL: le lr, [[PROLOGUE]] +; CHECK-UNROLL-NEXT: [[PROLOGUE_EXIT:.LBB[0-9_]+]]: +; CHECK-UNROLL: dls lr, lr +; CHECK-UNROLL: [[BODY:.LBB[0-9_]+]]: +; CHECK-UNROLL: le lr, [[BODY]] +; CHECK-UNROLL-NOT: b +; CHECK-UNROLL: pop define void @unroll_dec_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { entry: %cmp8 = icmp sgt i32 %N, 0 @@ -389,5 +429,6 @@ } declare void @llvm.set.loop.iterations.i32(i32) #0 +declare i1 @llvm.test.set.loop.iterations.i32(i32) #0 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 Index: llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir =================================================================== --- llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir +++ llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir @@ -1,198 +0,0 @@ -# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops -o - -# CHECK: bb.1.for.body.preheader: -# CHECK: $lr = t2DLS -# CHECK-NOT: t2LoopDec -# CHECK: bb.6.for.inc: -# CHECK: $lr = t2LEUpdate renamable $lr, %bb.2 - ---- | - target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" - target triple = "thumbv8.1m.main-unknown-unknown" - - ; Function Attrs: norecurse nounwind readonly - define dso_local arm_aapcscc i32 @search(i8* nocapture readonly %c, i32 %N) local_unnamed_addr #0 { - entry: - %cmp11 = icmp eq i32 %N, 0 - br i1 %cmp11, label %for.cond.cleanup, label %for.body.preheader - - for.body.preheader: - call void @llvm.set.loop.iterations.i32(i32 %N) - br label %for.body - - for.cond.cleanup: - %found.0.lcssa = phi i32 [ 0, %entry ], [ %found.1, %for.inc ] - %spaces.0.lcssa = phi i32 [ 0, %entry ], [ %spaces.1, %for.inc ] - %sub = sub nsw i32 %found.0.lcssa, %spaces.0.lcssa - ret i32 %sub - - for.body: - %lsr.iv1 = phi i8* [ %c, %for.body.preheader ], [ %scevgep, %for.inc ] - %spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %for.body.preheader ] - %found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %for.body.preheader ] - %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.inc ] - %1 = load i8, i8* %lsr.iv1, align 1 - %2 = zext i8 %1 to i32 - switch i32 %2, label %for.inc [ - i32 108, label %sw.bb - i32 111, label %sw.bb - i32 112, label %sw.bb - i32 32, label %sw.bb1 - ] - - sw.bb: - %inc = add nsw i32 %found.012, 1 - br label %for.inc - - sw.bb1: - %inc2 = add nsw i32 %spaces.013, 1 - br label %for.inc - - for.inc: - %found.1 = phi i32 [ %found.012, %for.body ], [ %found.012, %sw.bb1 ], [ %inc, %sw.bb ] - %spaces.1 = phi i32 [ %spaces.013, %for.body ], [ %inc2, %sw.bb1 ], [ %spaces.013, %sw.bb ] - %scevgep = getelementptr i8, i8* %lsr.iv1, i32 1 - %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) - %4 = icmp ne i32 %3, 0 - br i1 %4, label %for.body, label %for.cond.cleanup - } - - declare void @llvm.set.loop.iterations.i32(i32) #1 - declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 - declare void @llvm.stackprotector(i8*, i8**) #2 - - attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" } - attributes #1 = { noduplicate nounwind } - attributes #2 = { nounwind } - -... ---- -name: search -alignment: 1 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -registers: [] -liveins: - - { reg: '$r0', virtual-reg: '' } - - { reg: '$r1', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 16 - offsetAdjustment: -8 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 0 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: - - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -constants: [] -machineFunctionInfo: {} -body: | - bb.0.entry: - successors: %bb.1(0x30000000), %bb.3(0x50000000) - liveins: $r0, $r1, $r4, $r6, $lr - - $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r4, killed $r6, $r7, killed $lr - frame-setup CFI_INSTRUCTION def_cfa_offset 16 - frame-setup CFI_INSTRUCTION offset $lr, -4 - frame-setup CFI_INSTRUCTION offset $r7, -8 - frame-setup CFI_INSTRUCTION offset $r6, -12 - frame-setup CFI_INSTRUCTION offset $r4, -16 - $r7 = frame-setup t2ADDri $sp, 8, 14, $noreg, $noreg - frame-setup CFI_INSTRUCTION def_cfa $r7, 8 - t2CMPri $r1, 0, 14, $noreg, implicit-def $cpsr - t2Bcc %bb.1, 0, killed $cpsr - - bb.3.for.body.preheader: - successors: %bb.4(0x80000000) - liveins: $r0, $r1 - - $lr = tMOVr $r1, 14, $noreg - t2DoLoopStart killed $r1 - renamable $r1 = t2MOVi 0, 14, $noreg, $noreg - renamable $r12 = t2MOVi 1, 14, $noreg, $noreg - renamable $r2 = t2MOVi 0, 14, $noreg, $noreg - - bb.4.for.body: - successors: %bb.5(0x26666665), %bb.6(0x5999999b) - liveins: $lr, $r0, $r1, $r2, $r12 - - renamable $r3 = t2LDRBi12 renamable $r0, 0, 14, $noreg :: (load 1 from %ir.lsr.iv1) - renamable $r4 = t2SUBri renamable $r3, 108, 14, $noreg, $noreg - renamable $lr = t2LoopDec killed renamable $lr, 1 - t2CMPri renamable $r4, 4, 14, $noreg, implicit-def $cpsr - t2Bcc %bb.5, 8, killed $cpsr - - bb.6.for.body: - successors: %bb.7(0x6db6db6e), %bb.5(0x12492492) - liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r12 - - renamable $r4 = t2LSLrr renamable $r12, killed renamable $r4, 14, $noreg, $noreg - t2TSTri killed renamable $r4, 25, 14, $noreg, implicit-def $cpsr - t2Bcc %bb.5, 0, killed $cpsr - - bb.7.sw.bb: - successors: %bb.8(0x80000000) - liveins: $lr, $r0, $r1, $r2, $r12 - - renamable $r2 = nsw t2ADDri killed renamable $r2, 1, 14, $noreg, $noreg - t2B %bb.8, 14, $noreg - - bb.5.for.body: - successors: %bb.8(0x80000000) - liveins: $lr, $r0, $r1, $r2, $r3, $r12 - - t2CMPri killed renamable $r3, 32, 14, $noreg, implicit-def $cpsr - BUNDLE implicit-def dead $itstate, implicit-def $r1, implicit killed $r1, implicit killed $cpsr { - t2IT 0, 8, implicit-def $itstate - renamable $r1 = nsw t2ADDri killed renamable $r1, 1, 0, killed $cpsr, $noreg, implicit $r1, implicit internal killed $itstate - } - - bb.8.for.inc: - successors: %bb.4(0x7c000000), %bb.2(0x04000000) - liveins: $lr, $r0, $r1, $r2, $r12 - - renamable $r0 = t2ADDri killed renamable $r0, 1, 14, $noreg, $noreg - t2LoopEnd renamable $lr, %bb.4 - t2B %bb.2, 14, $noreg - - bb.2.for.cond.cleanup: - liveins: $r1, $r2 - - renamable $r0 = nsw t2SUBrr killed renamable $r2, killed renamable $r1, 14, $noreg, $noreg - $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r6, def $r7, def $pc, implicit killed $r0 - - bb.1: - renamable $r2 = t2MOVi 0, 14, $noreg, $noreg - renamable $r1 = t2MOVi 0, 14, $noreg, $noreg - renamable $r0 = nsw t2SUBrr killed renamable $r2, killed renamable $r1, 14, $noreg, $noreg - $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r6, def $r7, def $pc, implicit killed $r0 - -...