Index: llvm/include/llvm/CodeGen/ReachingDefAnalysis.h =================================================================== --- llvm/include/llvm/CodeGen/ReachingDefAnalysis.h +++ llvm/include/llvm/CodeGen/ReachingDefAnalysis.h @@ -130,6 +130,12 @@ /// reaching def instuction of PhysReg that reaches MI. int getClearance(MachineInstr *MI, MCPhysReg PhysReg); + /// For the given block, collect the instructions that use the live-in + /// value of the provided register. Return whether the value is still + /// live on exit. + bool getLiveInUses(MachineBasicBlock *MBB, int PhysReg, + SmallPtrSetImpl &Uses); + /// Provides the uses, in the same block as MI, of register that MI defines. /// This does not consider live-outs. void getReachingLocalUses(MachineInstr *MI, int PhysReg, Index: llvm/lib/CodeGen/ReachingDefAnalysis.cpp =================================================================== --- llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -330,3 +330,17 @@ Pos = Use; } } + +bool ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB, int PhysReg, + SmallPtrSetImpl &Uses) { + for (auto &MI : *MBB) { + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.isUse() || MO.getReg() != PhysReg) + continue; + if (getReachingDef(&MI, PhysReg) >= 0) + return false; + Uses.insert(&MI); + } + } + return isReachingDefLiveOut(&MBB->back(), PhysReg); +} Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -664,6 +664,10 @@ return true; } +static inline bool isMovRegOpcode(int Opc) { + return Opc == ARM::MOVr || Opc == ARM::tMOVr || Opc == ARM::t2MOVr; +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. Index: llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp =================================================================== --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -122,6 +122,7 @@ struct LowOverheadLoop { MachineLoop *ML = nullptr; + const TargetRegisterInfo *TRI = nullptr; MachineFunction *MF = nullptr; MachineInstr *InsertPt = nullptr; MachineInstr *Start = nullptr; @@ -134,10 +135,18 @@ bool Revert = false; bool CannotTailPredicate = false; - LowOverheadLoop(MachineLoop *ML) : ML(ML) { + LowOverheadLoop(MachineLoop *ML, const TargetRegisterInfo *TRI) : + ML(ML), TRI(TRI) { MF = ML->getHeader()->getParent(); } + bool IsTailPredicationLegal() const { + // For now, let's keep things really simple and only support a single + // block for tail predication. + return !Revert && FoundAllComponents() && VCTP && + !CannotTailPredicate && ML->getNumBlocks() == 1; + } + // If this is an MVE instruction, check that we know how to use tail // predication with it. Record VPT blocks and return whether the // instruction is valid for tail predication. @@ -147,12 +156,9 @@ CannotTailPredicate = !ValidateMVEInst(MI); } - bool IsTailPredicationLegal() const { - // For now, let's keep things really simple and only support a single - // block for tail predication. - return !Revert && FoundAllComponents() && VCTP && - !CannotTailPredicate && ML->getNumBlocks() == 1; - } + // Check whether any of MIs defs are live-out, and if so, whether they're + // predicated by the VCTP. + bool ValidateLiveOuts(MachineInstr *MI, ReachingDefAnalysis *RDA); bool ValidateTailPredicate(MachineInstr *StartInsertPt, ReachingDefAnalysis *RDA, @@ -323,8 +329,85 @@ return true; } +bool LowOverheadLoop::ValidateLiveOuts(MachineInstr *MI, + ReachingDefAnalysis *RDA) { + const MCInstrDesc &MCID = MI->getDesc(); + uint64_t Flags = MCID.TSFlags; + if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE) + return true; + + const TargetRegisterClass *QPRs = TRI->getRegClass(ARM::MQPRRegClassID); + SmallVector LiveOuts; + for (auto &MO : MI->operands()) { + if (!MO.isReg() || MO.getReg() == 0 || !QPRs->contains(MO.getReg())) + continue; + else if (MO.isDef() && isRegLiveInExitBlocks(ML, MO.getReg())) + LiveOuts.push_back(MO.getReg()); + } + + if (LiveOuts.empty()) + return true; + + if (!VCTP) + return false; + + // Return whether Pred is performing the same calculation as the vctp + // within the loop body. + auto IsEquivalentVCTP = [](MachineInstr *Pred, MachineInstr *VCTP, + ReachingDefAnalysis *RDA) { + if (Pred->getOpcode() != VCTP->getOpcode()) + return false; + + MachineBasicBlock *LoopBody = VCTP->getParent(); + unsigned ElemCountReg = VCTP->getOperand(1).getReg(); + unsigned Reg = Pred->getOperand(1).getReg(); + + if (RDA->getReachingDef(Pred, Reg) < 0) { + if (auto *Def = RDA->getLocalLiveOutMIDef(LoopBody, Reg)) { + if (Reg == ElemCountReg && + Def == RDA->getReachingMIDef(VCTP, Reg)) + return true; + else if (isMovRegOpcode(Def->getOpcode()) && + Def->getOperand(1).getReg() == ElemCountReg && + RDA->hasSameReachingDef(Def, VCTP, ElemCountReg)) + return true; + } + } + return false; + }; + + LLVM_DEBUG(dbgs() << "ARM Loops: Inspecting loop live-outs.\n"); + SmallVector ExitBlocks; + ML->getExitBlocks(ExitBlocks); + + // Any live-out values should be somehow predicated upon a vctp that is + // equivalent to the predication happening within loop. Otherwise, when + // we perform tail-predication, we may be predicating instructions that + // should not be predicated. + for (auto *MBB : ExitBlocks) { + for (auto Reg : LiveOuts) { + SmallPtrSet Uses; + RDA->getLiveInUses(MBB, Reg, Uses); + + for (auto *Use : Uses) { + LLVM_DEBUG(dbgs() << "ARM Loops: Live out use: " << *Use); + if (Use->getOpcode() != ARM::MVE_VPSEL) + return false; + + unsigned VPRIdx = llvm::findFirstVPTPredOperandIdx(*Use) + 1; + auto *Pred = RDA->getReachingMIDef(Use, + Use->getOperand(VPRIdx).getReg()); + if (!Pred || (!IsEquivalentVCTP(Pred, VCTP, RDA))) + return false; + } + } + } + return true; +} + bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt, - ReachingDefAnalysis *RDA, MachineLoopInfo *MLI) { + ReachingDefAnalysis *RDA, + MachineLoopInfo *MLI) { assert(VCTP && "VCTP instruction expected but is not set"); // All predication within the loop should be based on vctp. If the block // isn't predicated on entry, check whether the vctp is within the block @@ -419,6 +502,11 @@ MBB = *MBB->pred_begin(); } + // Now check that any live-ins and outs are handled appropriately. + for (auto &MI : *ML->getTopBlock()) + if (!ValidateLiveOuts(&MI, RDA)) + return false; + LLVM_DEBUG(dbgs() << "ARM Loops: Will use tail predication.\n"); return true; } @@ -608,7 +696,7 @@ return nullptr; }; - LowOverheadLoop LoLoop(ML); + LowOverheadLoop LoLoop(ML, TRI); // Search the preheader for the start intrinsic. // FIXME: I don't see why we shouldn't be supporting multiple predecessors // with potentially multiple set.loop.iterations, so we need to enable this. Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir @@ -0,0 +1,187 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s + +# Test that the scalar register that aliases a Q reg prevents the tail +# predication. + +--- | + define dso_local i32 @no_vpsel_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) local_unnamed_addr #0 { + entry: + %cmp9 = icmp eq i32 %N, 0 + %tmp = add i32 %N, 3 + %tmp1 = lshr i32 %tmp, 2 + %tmp2 = shl nuw i32 %tmp1, 2 + %tmp3 = add i32 %tmp2, -4 + %tmp4 = lshr i32 %tmp3, 2 + %tmp5 = add nuw nsw i32 %tmp4, 1 + br i1 %cmp9, label %for.cond.cleanup, label %vector.ph + + vector.ph: ; preds = %entry + call void @llvm.set.loop.iterations.i32(i32 %tmp5) + br label %vector.body + + vector.body: ; preds = %vector.body, %vector.ph + %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ] + %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ] + %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] + %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* + %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>* + %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) + %tmp9 = sub i32 %tmp7, 4 + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32> + %tmp12 = mul nsw <4 x i32> %tmp11, %tmp10 + %tmp13 = add <4 x i32> %tmp12, %vec.phi + %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 + %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4 + %tmp14 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) + %tmp15 = icmp ne i32 %tmp14, 0 + %lsr.iv.next = add nsw i32 %lsr.iv1, -1 + br i1 %tmp15, label %vector.body, label %middle.block + + middle.block: ; preds = %vector.body + %tmp16 = extractelement <4 x i32> %tmp13, i32 3 + br label %for.cond.cleanup + + for.cond.cleanup: ; preds = %middle.block, %entry + %res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp16, %middle.block ] + ret i32 %res.0.lcssa + } + declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) + declare void @llvm.set.loop.iterations.i32(i32) + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) + declare <4 x i1> @llvm.arm.mve.vctp32(i32) + +... +--- +name: no_vpsel_liveout +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: no_vpsel_liveout + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $lr, $r7 + ; CHECK: tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr + ; CHECK: t2IT 0, 4, implicit-def $itstate + ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate + ; CHECK: tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14, $noreg + ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14, $noreg, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg + ; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg + ; CHECK: $lr = t2DLS renamable $r12 + ; CHECK: $r3 = tMOVr killed $r12, 14, $noreg + ; CHECK: bb.1.vector.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $q0, $r0, $r1, $r2, $r3 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg + ; CHECK: MVE_VPST 4, implicit $vpr + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: $lr = tMOVr $r3, 14, $noreg + ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 + ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14, $noreg + ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg + ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 + ; CHECK: bb.2.middle.block: + ; CHECK: liveins: $q0 + ; CHECK: $r0 = VMOVRS killed $s3, 14, $noreg, implicit $q0 + ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $lr, $r7 + + tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr + t2IT 0, 4, implicit-def $itstate + renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate + tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14, $noreg + renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + renamable $r3 = t2BICri killed renamable $r3, 3, 14, $noreg, $noreg + renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg + renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg + renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg + t2DoLoopStart renamable $r12 + $r3 = tMOVr killed $r12, 14, $noreg + + bb.1.vector.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $q0, $r0, $r1, $r2, $r3 + + renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg + MVE_VPST 4, implicit $vpr + renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + $lr = tMOVr $r3, 14, $noreg + renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 + renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14, $noreg + renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg + renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14, $noreg + + bb.2.middle.block: + liveins: $q0 + + $r0 = VMOVRS killed $s3, 14, $noreg, implicit $q0 + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + +... Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir @@ -0,0 +1,184 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s +--- | + define dso_local i32 @no_vpsel_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) local_unnamed_addr #0 { + entry: + %cmp9 = icmp eq i32 %N, 0 + %tmp = add i32 %N, 3 + %tmp1 = lshr i32 %tmp, 2 + %tmp2 = shl nuw i32 %tmp1, 2 + %tmp3 = add i32 %tmp2, -4 + %tmp4 = lshr i32 %tmp3, 2 + %tmp5 = add nuw nsw i32 %tmp4, 1 + br i1 %cmp9, label %for.cond.cleanup, label %vector.ph + + vector.ph: ; preds = %entry + call void @llvm.set.loop.iterations.i32(i32 %tmp5) + br label %vector.body + + vector.body: ; preds = %vector.body, %vector.ph + %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ] + %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ] + %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] + %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* + %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>* + %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) + %tmp9 = sub i32 %tmp7, 4 + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32> + %tmp12 = mul nsw <4 x i32> %tmp11, %tmp10 + %tmp13 = add <4 x i32> %tmp12, %vec.phi + %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 + %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4 + %tmp14 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) + %tmp15 = icmp ne i32 %tmp14, 0 + %lsr.iv.next = add nsw i32 %lsr.iv1, -1 + br i1 %tmp15, label %vector.body, label %middle.block + + middle.block: ; preds = %vector.body + %tmp16 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp13) + br label %for.cond.cleanup + + for.cond.cleanup: ; preds = %middle.block, %entry + %res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp16, %middle.block ] + ret i32 %res.0.lcssa + } + declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 + declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2 + declare void @llvm.set.loop.iterations.i32(i32) #3 + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 + declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4 + +... +--- +name: no_vpsel_liveout +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: no_vpsel_liveout + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $lr, $r7 + ; CHECK: tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr + ; CHECK: t2IT 0, 4, implicit-def $itstate + ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate + ; CHECK: tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14, $noreg + ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14, $noreg, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg + ; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg + ; CHECK: $lr = t2DLS renamable $r12 + ; CHECK: $r3 = tMOVr killed $r12, 14, $noreg + ; CHECK: bb.1.vector.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $q0, $r0, $r1, $r2, $r3 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg + ; CHECK: MVE_VPST 4, implicit $vpr + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: $lr = tMOVr $r3, 14, $noreg + ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 + ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14, $noreg + ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg + ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 + ; CHECK: bb.2.middle.block: + ; CHECK: liveins: $q0 + ; CHECK: renamable $r0 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg + ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $lr, $r7 + + tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr + t2IT 0, 4, implicit-def $itstate + renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate + tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14, $noreg + renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + renamable $r3 = t2BICri killed renamable $r3, 3, 14, $noreg, $noreg + renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg + renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg + renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg + t2DoLoopStart renamable $r12 + $r3 = tMOVr killed $r12, 14, $noreg + + bb.1.vector.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $q0, $r0, $r1, $r2, $r3 + + renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg + MVE_VPST 4, implicit $vpr + renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + $lr = tMOVr $r3, 14, $noreg + renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 + renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14, $noreg + renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg + renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14, $noreg + + bb.2.middle.block: + liveins: $q0 + + renamable $r0 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + +... Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir @@ -0,0 +1,199 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s + +# This example is actually equivalent as there's a sub in the loop, which is +# then used by the add in the exit - making the vctp operands equivalent. + +--- | + define dso_local i32 @wrong_vctp_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) local_unnamed_addr #0 { + entry: + %cmp9 = icmp eq i32 %N, 0 + %0 = add i32 %N, 3 + %1 = lshr i32 %0, 2 + %2 = shl nuw i32 %1, 2 + %3 = add i32 %2, -4 + %4 = lshr i32 %3, 2 + %5 = add nuw nsw i32 %4, 1 + br i1 %cmp9, label %for.cond.cleanup, label %vector.ph + + vector.ph: ; preds = %entry + call void @llvm.set.loop.iterations.i32(i32 %5) + br label %vector.body + + vector.body: ; preds = %vector.body, %vector.ph + %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %5, %vector.ph ] + %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ] + %6 = phi i32 [ %N, %vector.ph ], [ %8, %vector.body ] + %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* + %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>* + %7 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %6) + %8 = sub i32 %6, 4 + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %7, <4 x i16> undef) + %9 = sext <4 x i16> %wide.masked.load to <4 x i32> + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %7, <4 x i16> undef) + %10 = sext <4 x i16> %wide.masked.load14 to <4 x i32> + %11 = mul nsw <4 x i32> %10, %9 + %12 = add <4 x i32> %11, %vec.phi + %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 + %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4 + %13 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) + %14 = icmp ne i32 %13, 0 + %lsr.iv.next = add nsw i32 %lsr.iv1, -1 + br i1 %14, label %vector.body, label %middle.block + + middle.block: ; preds = %vector.body + %15 = add i32 %8, 4 + %16 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %15) + %17 = select <4 x i1> %16, <4 x i32> %12, <4 x i32> %vec.phi + %18 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %17) + br label %for.cond.cleanup + + for.cond.cleanup: ; preds = %middle.block, %entry + %res.0.lcssa = phi i32 [ 0, %entry ], [ %18, %middle.block ] + ret i32 %res.0.lcssa + } + declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) + declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) + declare void @llvm.set.loop.iterations.i32(i32) + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) + declare <4 x i1> @llvm.arm.mve.vctp32(i32) + +... +--- +name: wrong_vctp_liveout +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: wrong_vctp_liveout + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $lr, $r7 + ; CHECK: tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr + ; CHECK: t2IT 0, 4, implicit-def $itstate + ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate + ; CHECK: tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14, $noreg + ; CHECK: renamable $q1 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q1 + ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14, $noreg, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg + ; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg + ; CHECK: $lr = t2DLS renamable $r12 + ; CHECK: $r3 = tMOVr killed $r12, 14, $noreg + ; CHECK: bb.1.vector.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $q1, $r0, $r1, $r2, $r3 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg + ; CHECK: $q0 = MVE_VORR killed $q1, $q1, 0, $noreg, undef $q0 + ; CHECK: MVE_VPST 4, implicit $vpr + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: $lr = tMOVr $r3, 14, $noreg + ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 + ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14, $noreg + ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg + ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 + ; CHECK: bb.2.middle.block: + ; CHECK: liveins: $q0, $q1, $r2 + ; CHECK: renamable $r0, dead $cpsr = tADDi3 killed renamable $r2, 4, 14, $noreg + ; CHECK: renamable $vpr = MVE_VCTP32 killed renamable $r0, 0, $noreg + ; CHECK: renamable $q0 = MVE_VPSEL killed renamable $q1, killed renamable $q0, 0, killed renamable $vpr + ; CHECK: renamable $r0 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg + ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $lr, $r7 + + tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr + t2IT 0, 4, implicit-def $itstate + renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate + tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14, $noreg + renamable $q1 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q1 + renamable $r3 = t2BICri killed renamable $r3, 3, 14, $noreg, $noreg + renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg + renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg + renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg + t2DoLoopStart renamable $r12 + $r3 = tMOVr killed $r12, 14, $noreg + + bb.1.vector.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $q1, $r0, $r1, $r2, $r3 + + renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg + $q0 = MVE_VORR killed $q1, $q1, 0, $noreg, undef $q0 + MVE_VPST 4, implicit $vpr + renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + $lr = tMOVr $r3, 14, $noreg + renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 + renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14, $noreg + renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg + renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14, $noreg + + bb.2.middle.block: + liveins: $q0, $q1, $r2 + + renamable $r0, dead $cpsr = tADDi3 killed renamable $r2, 4, 14, $noreg + renamable $vpr = MVE_VCTP32 killed renamable $r0, 0, $noreg + renamable $q0 = MVE_VPSEL killed renamable $q1, killed renamable $q0, 0, killed renamable $vpr + renamable $r0 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + +... Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir @@ -0,0 +1,210 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s + +# I think this should be equivalent, but the calculation in the middle block +# is too complex to process for now. + +--- | + define dso_local i32 @wrong_vctp_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) local_unnamed_addr #0 { + entry: + %cmp9 = icmp eq i32 %N, 0 + %tmp = add i32 %N, 3 + %tmp1 = lshr i32 %tmp, 2 + %tmp2 = shl nuw i32 %tmp1, 2 + %tmp3 = add i32 %tmp2, -4 + %tmp4 = lshr i32 %tmp3, 2 + %tmp5 = add nuw nsw i32 %tmp4, 1 + br i1 %cmp9, label %for.cond.cleanup, label %vector.ph + + vector.ph: ; preds = %entry + call void @llvm.set.loop.iterations.i32(i32 %tmp5) + br label %vector.body + + vector.body: ; preds = %vector.body, %vector.ph + %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ] + %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ] + %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] + %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* + %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>* + %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) + %tmp9 = sub i32 %tmp7, 4 + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32> + %tmp12 = mul nsw <4 x i32> %tmp11, %tmp10 + %tmp13 = add <4 x i32> %tmp12, %vec.phi + %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 + %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4 + %tmp14 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) + %tmp15 = icmp ne i32 %tmp14, 0 + %lsr.iv.next = add nsw i32 %lsr.iv1, -1 + br i1 %tmp15, label %vector.body, label %middle.block + + middle.block: ; preds = %vector.body + %0 = add i32 %tmp9, 4 + %insert.idx = insertelement <4 x i32> undef, i32 %0, i32 0 + %idx.splat = shufflevector <4 x i32> %insert.idx, <4 x i32> undef, <4 x i32> zeroinitializer + %n.minusone = add i32 %N, -1 + %insert.n = insertelement <4 x i32> undef, i32 %n.minusone, i32 0 + %n.splat = shufflevector <4 x i32> %insert.n, <4 x i32> undef, <4 x i32> zeroinitializer + %tmp16 = icmp ult <4 x i32> %idx.splat, %n.splat + %tmp17 = select <4 x i1> %tmp16, <4 x i32> %tmp13, <4 x i32> %vec.phi + %tmp18 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp17) + br label %for.cond.cleanup + + for.cond.cleanup: ; preds = %middle.block, %entry + %res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp18, %middle.block ] + ret i32 %res.0.lcssa + } + declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 + declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2 + declare void @llvm.set.loop.iterations.i32(i32) #3 + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 + declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4 + +... +--- +name: wrong_vctp_liveout +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: wrong_vctp_liveout + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $lr, $r7 + ; CHECK: tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr + ; CHECK: t2IT 0, 4, implicit-def $itstate + ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate + ; CHECK: tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14, $noreg + ; CHECK: renamable $q1 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q1 + ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14, $noreg, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg + ; CHECK: renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg + ; CHECK: $r12 = tMOVr killed $r3, 14, $noreg + ; CHECK: $r3 = tMOVr $r2, 14, $noreg + ; CHECK: $lr = t2DLS renamable $r3 + ; CHECK: bb.1.vector.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $q1, $r0, $r1, $r2, $r3, $r12 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg + ; CHECK: $q0 = MVE_VORR killed $q1, $q1, 0, $noreg, undef $q0 + ; CHECK: MVE_VPST 4, implicit $vpr + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: $lr = tMOVr $r12, 14, $noreg + ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 + ; CHECK: renamable $r12 = nsw t2SUBri killed $r12, 1, 14, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14, $noreg + ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 + ; CHECK: bb.2.middle.block: + ; CHECK: liveins: $q0, $q1, $r2, $r3 + ; CHECK: renamable $r0, dead $cpsr = tSUBi3 killed renamable $r2, 1, 14, $noreg + ; CHECK: renamable $q2 = MVE_VDUP32 killed renamable $r0, 0, $noreg, undef renamable $q2 + ; CHECK: renamable $r0, dead $cpsr = tADDi3 killed renamable $r3, 4, 14, $noreg + ; CHECK: renamable $vpr = MVE_VCMPu32r killed renamable $q2, killed renamable $r0, 8, 0, $noreg + ; CHECK: renamable $q0 = MVE_VPSEL killed renamable $q1, killed renamable $q0, 0, killed renamable $vpr + ; CHECK: renamable $r0 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg + ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $lr, $r7 + + tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr + t2IT 0, 4, implicit-def $itstate + renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate + tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14, $noreg + renamable $q1 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q1 + renamable $r3 = t2BICri killed renamable $r3, 3, 14, $noreg, $noreg + renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg + renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg + renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg + t2DoLoopStart renamable $r3 + $r12 = tMOVr killed $r3, 14, $noreg + $r3 = tMOVr $r2, 14, $noreg + + bb.1.vector.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $q1, $r0, $r1, $r2, $r3, $r12 + + renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg + $q0 = MVE_VORR killed $q1, $q1, 0, $noreg, undef $q0 + MVE_VPST 4, implicit $vpr + renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + $lr = tMOVr $r12, 14, $noreg + renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 + renamable $r12 = nsw t2SUBri killed $r12, 1, 14, $noreg, $noreg + renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14, $noreg + renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14, $noreg + + bb.2.middle.block: + liveins: $q0, $q1, $r2, $r3 + + renamable $r0, dead $cpsr = tSUBi3 killed renamable $r2, 1, 14, $noreg + renamable $q2 = MVE_VDUP32 killed renamable $r0, 0, $noreg, undef renamable $q2 + renamable $r0, dead $cpsr = tADDi3 killed renamable $r3, 4, 14, $noreg + renamable $vpr = MVE_VCMPu32r killed renamable $q2, killed renamable $r0, 8, 0, $noreg + renamable $q0 = MVE_VPSEL killed renamable $q1, killed renamable $q0, 0, killed renamable $vpr + renamable $r0 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + +... Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir @@ -0,0 +1,194 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s + +# The VCTP uses r2, which is redefined in the loop. + +--- | + define dso_local i32 @wrong_vctp_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) local_unnamed_addr #0 { + entry: + %cmp9 = icmp eq i32 %N, 0 + %0 = add i32 %N, 3 + %1 = lshr i32 %0, 2 + %2 = shl nuw i32 %1, 2 + %3 = add i32 %2, -4 + %4 = lshr i32 %3, 2 + %5 = add nuw nsw i32 %4, 1 + br i1 %cmp9, label %for.cond.cleanup, label %vector.ph + + vector.ph: ; preds = %entry + call void @llvm.set.loop.iterations.i32(i32 %5) + br label %vector.body + + vector.body: ; preds = %vector.body, %vector.ph + %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %5, %vector.ph ] + %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ] + %6 = phi i32 [ %N, %vector.ph ], [ %8, %vector.body ] + %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* + %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>* + %7 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %6) + %8 = sub i32 %6, 4 + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %7, <4 x i16> undef) + %9 = sext <4 x i16> %wide.masked.load to <4 x i32> + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %7, <4 x i16> undef) + %10 = sext <4 x i16> %wide.masked.load14 to <4 x i32> + %11 = mul nsw <4 x i32> %10, %9 + %12 = add <4 x i32> %11, %vec.phi + %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 + %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4 + %13 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) + %14 = icmp ne i32 %13, 0 + %lsr.iv.next = add nsw i32 %lsr.iv1, -1 + br i1 %14, label %vector.body, label %middle.block + + middle.block: ; preds = %vector.body + %15 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %8) + %16 = select <4 x i1> %15, <4 x i32> %12, <4 x i32> %vec.phi + %17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %16) + br label %for.cond.cleanup + + for.cond.cleanup: ; preds = %middle.block, %entry + %res.0.lcssa = phi i32 [ 0, %entry ], [ %17, %middle.block ] + ret i32 %res.0.lcssa + } + declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 + declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2 + declare void @llvm.set.loop.iterations.i32(i32) #3 + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 + declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4 +... +--- +name: wrong_vctp_liveout +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: wrong_vctp_liveout + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $lr, $r7 + ; CHECK: tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr + ; CHECK: t2IT 0, 4, implicit-def $itstate + ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate + ; CHECK: tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14, $noreg + ; CHECK: renamable $q1 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q1 + ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14, $noreg, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg + ; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg + ; CHECK: $lr = t2DLS renamable $r12 + ; CHECK: $r3 = tMOVr killed $r12, 14, $noreg + ; CHECK: bb.1.vector.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $q1, $r0, $r1, $r2, $r3 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg + ; CHECK: $q0 = MVE_VORR killed $q1, $q1, 0, $noreg, undef $q0 + ; CHECK: MVE_VPST 4, implicit $vpr + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: $lr = tMOVr $r3, 14, $noreg + ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 + ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14, $noreg + ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg + ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 + ; CHECK: bb.2.middle.block: + ; CHECK: liveins: $q0, $q1, $r2 + ; CHECK: renamable $vpr = MVE_VCTP32 killed renamable $r2, 0, $noreg + ; CHECK: renamable $q0 = MVE_VPSEL killed renamable $q1, killed renamable $q0, 0, killed renamable $vpr + ; CHECK: renamable $r0 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg + ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $lr, $r7 + + tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr + t2IT 0, 4, implicit-def $itstate + renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate + tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14, $noreg + renamable $q1 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q1 + renamable $r3 = t2BICri killed renamable $r3, 3, 14, $noreg, $noreg + renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg + renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg + renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg + t2DoLoopStart renamable $r12 + $r3 = tMOVr killed $r12, 14, $noreg + + bb.1.vector.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $q1, $r0, $r1, $r2, $r3 + + renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg + $q0 = MVE_VORR killed $q1, $q1, 0, $noreg, undef $q0 + MVE_VPST 4, implicit $vpr + renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + $lr = tMOVr $r3, 14, $noreg + renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 + renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14, $noreg + renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg + renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14, $noreg + + bb.2.middle.block: + liveins: $q0, $q1, $r2 + + renamable $vpr = MVE_VCTP32 killed renamable $r2, 0, $noreg + renamable $q0 = MVE_VPSEL killed renamable $q1, killed renamable $q0, 0, killed renamable $vpr + renamable $r0 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + +...