Index: llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp =================================================================== --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -123,27 +123,31 @@ SetVector Predicates; public: - PredicatedMI(MachineInstr *I, SetVector &Preds) : - MI(I) { Predicates.insert(Preds.begin(), Preds.end()); } + PredicatedMI(MachineInstr *I, SetVector &Preds) : MI(I) { + assert(I && "Instruction must not be null!"); + Predicates.insert(Preds.begin(), Preds.end()); + } }; - // Represent a VPT block, a list of instructions that begins with a VPST and - // has a maximum of four proceeding instructions. All instructions within the - // block are predicated upon the vpr and we allow instructions to define the - // vpr within in the block too. + // Represent a VPT block, a list of instructions that begins with a VPT/VPST + // and has a maximum of four proceeding instructions. All instructions within + // the block are predicated upon the vpr and we allow instructions to define + // the vpr within in the block too. class VPTBlock { - std::unique_ptr VPST; + // The predicate then instruction, which is either a VPT, or a VPST + // instruction. + std::unique_ptr PredicateThen; PredicatedMI *Divergent = nullptr; SmallVector Insts; public: VPTBlock(MachineInstr *MI, SetVector &Preds) { - VPST = std::make_unique(MI, Preds); + PredicateThen = std::make_unique(MI, Preds); } void addInst(MachineInstr *MI, SetVector &Preds) { LLVM_DEBUG(dbgs() << "ARM Loops: Adding predicated MI: " << *MI); - if (!Divergent && !set_difference(Preds, VPST->Predicates).empty()) { + if (!Divergent && !set_difference(Preds, PredicateThen->Predicates).empty()) { Divergent = &Insts.back(); LLVM_DEBUG(dbgs() << " - has divergent predicate: " << *Divergent->MI); } @@ -160,18 +164,26 @@ // Is the given instruction part of the predicate set controlling the entry // to the block. bool IsPredicatedOn(MachineInstr *MI) const { - return VPST->Predicates.count(MI); + return PredicateThen->Predicates.count(MI); + } + + // Returns true if this is a VPT instruction. + bool isVPT() const { return !isVPST(); } + + // Returns true if this is a VPST instruction. + bool isVPST() const { + return PredicateThen->MI->getOpcode() == ARM::MVE_VPST; } // Is the given instruction the only predicate which controls the entry to // the block. bool IsOnlyPredicatedOn(MachineInstr *MI) const { - return IsPredicatedOn(MI) && VPST->Predicates.size() == 1; + return IsPredicatedOn(MI) && PredicateThen->Predicates.size() == 1; } unsigned size() const { return Insts.size(); } SmallVectorImpl &getInsts() { return Insts; } - MachineInstr *getVPST() const { return VPST->MI; } + MachineInstr *getPredicateThen() const { return PredicateThen->MI; } PredicatedMI *getDivergent() const { return Divergent; } }; @@ -187,6 +199,7 @@ MachineInstr *Dec = nullptr; MachineInstr *End = nullptr; MachineInstr *VCTP = nullptr; + SmallPtrSet SecondaryVCTPs; VPTBlock *CurrentBlock = nullptr; SetVector CurrentPredicate; SmallVector VPTBlocks; @@ -206,6 +219,56 @@ // instruction is valid for tail predication. bool ValidateMVEInst(MachineInstr *MI); + // Finds all of the definition of R that reach MI and stores them in Insts. + void GetReachingDefs(MachineInstr *MI, Register R, + SmallPtrSetImpl &Defs) { + if (MachineInstr *UniqueDef = RDA.getUniqueReachingMIDef(MI, R)) { + Defs.insert(UniqueDef); + return; + } + + // If there isn't a single reaching definition, use getLiveOuts on the + // predecessors. + MachineBasicBlock *MIBB = MI->getParent(); + SmallPtrSet VisitedBBs; + for (MachineBasicBlock *Pred : MIBB->predecessors()) + RDA.getLiveOuts(Pred, R, Defs, VisitedBBs); + } + + // Returns true if this VPT's operands are either loop invariants, or + // defined by predicated instructions in the loop. + bool IsAcceptableVPT(MachineInstr *MI) { + assert(isVPTOpcode(MI->getOpcode()) && + (MI->getOpcode() != ARM::MVE_VPST) && "Not a VPT!"); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + + if (!Op1.isReg() || !Op2.isReg()) + return false; + + auto IsAcceptable = [&](Register R) { + // Check that the register is either a loop invariant (= not defined in + // the loop) or that it's defined by a predicated instruction in the + // loop, or a mix of both (e.g. when the register has 2 defs, one in the + // header and one in the loop) + SmallPtrSet Defs; + GetReachingDefs(MI, R, Defs); + + for (MachineInstr *Def : Defs) { + // If Def isn't in the same basic block as MI, it's always fine, but + // if it's in the same BB, it must be a predicated instruction. + // FIXME: This assumes that the loop is a single basic block, but this + // is currently fine as this pass only accepts single-blocks loops. + if (Def->getParent() == MI->getParent()) + if (getVPTInstrPredicate(*Def) == ARMVCC::None) + return false; + } + return true; + }; + + return IsAcceptable(Op1.getReg()) && IsAcceptable(Op2.getReg()); + } + void AnalyseMVEInst(MachineInstr *MI) { CannotTailPredicate = !ValidateMVEInst(MI); } @@ -474,6 +537,8 @@ SmallPtrSet Ignore = { VCTP }; unsigned ExpectedVectorWidth = getTailPredVectorWidth(VCTP->getOpcode()); + Ignore.insert(SecondaryVCTPs.begin(), SecondaryVCTPs.end()); + if (RDA.isSafeToRemove(Def, ElementChain, Ignore)) { bool FoundSub = false; @@ -622,7 +687,7 @@ if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE) continue; - if (isVCTP(&MI) || MI.getOpcode() == ARM::MVE_VPST) + if (isVCTP(&MI) || isVPTOpcode(MI.getOpcode())) continue; // Predicated loads will write zeros to the falsely predicated bytes of the @@ -768,29 +833,44 @@ if (CannotTailPredicate) return false; - // Only support a single vctp. - if (isVCTP(MI) && VCTP) - return false; + if (isVCTP(MI)) { + // If we find another VCTP, check whether it uses the same value as the main VCTP. + // If it does, store it in the SecondaryVCTPs set, else refuse it. + if (VCTP) { + if (!VCTP->getOperand(1).isIdenticalTo(MI->getOperand(1)) || + !RDA.hasSameReachingDef(VCTP, MI, MI->getOperand(1).getReg())) + return false; + LLVM_DEBUG(dbgs() << "ARM Loops: Found secondary VCTP: " << *MI); + SecondaryVCTPs.insert(MI); + } else { + LLVM_DEBUG(dbgs() << "ARM Loops: Found 'main' VCTP: " << *MI); + VCTP = MI; + } + } else if (isVPTOpcode(MI->getOpcode())) { + // We do not need to do anything special for VPSTs, but VPTs are tricky - we + // need to check that at least one of their operands is defined by a + // predicated instruction in a previous VPT block. + if (MI->getOpcode() != ARM::MVE_VPST && !IsAcceptableVPT(MI)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Rejecting VPT: " + << *MI); + return false; + } - // Start a new vpt block when we discover a vpt. - if (MI->getOpcode() == ARM::MVE_VPST) { VPTBlocks.emplace_back(MI, CurrentPredicate); CurrentBlock = &VPTBlocks.back(); return true; - } else if (isVCTP(MI)) - VCTP = MI; - else if (MI->getOpcode() == ARM::MVE_VPSEL || - MI->getOpcode() == ARM::MVE_VPNOT) + } else if (MI->getOpcode() == ARM::MVE_VPSEL || + MI->getOpcode() == ARM::MVE_VPNOT) { + // TODO: Allow VPSEL and VPNOT, we currently cannot because: + // 1) It will use the VPR as a predicate operand, but doesn't have to be + // instead a VPT block, which means we can assert while building up + // the VPT block because we don't find another VPT or VPST to being a new + // one. + // 2) VPSEL still requires a VPR operand even after tail predicating, + // which means we can't remove it unless there is another + // instruction, such as vcmp, that can provide the VPR def. return false; - - // TODO: Allow VPSEL and VPNOT, we currently cannot because: - // 1) It will use the VPR as a predicate operand, but doesn't have to be - // instead a VPT block, which means we can assert while building up - // the VPT block because we don't find another VPST to being a new - // one. - // 2) VPSEL still requires a VPR operand even after tail predicating, - // which means we can't remove it unless there is another - // instruction, such as vcmp, that can provide the VPR def. + } bool IsUse = false; bool IsDef = false; @@ -1170,26 +1250,24 @@ }; // There are a few scenarios which we have to fix up: - // 1) A VPT block with is only predicated by the vctp and has no internal vpr - // defs. - // 2) A VPT block which is only predicated by the vctp but has an internal - // vpr def. - // 3) A VPT block which is predicated upon the vctp as well as another vpr - // def. - // 4) A VPT block which is not predicated upon a vctp, but contains it and - // all instructions within the block are predicated upon in. - + // 1. VPT Blocks with non-uniform predicates: + // - a. When the divergent instruction is a vctp + // - b. When the block uses a vpst, and is only predicated on the vctp + // - c. When the block uses a vpt and (optionally) contains one or more + // vctp. + // 2. VPT Blocks with uniform predicates: + // - a. The block uses a vpst, and is only predicated on the vctp for (auto &Block : LoLoop.getVPTBlocks()) { SmallVectorImpl &Insts = Block.getInsts(); if (Block.HasNonUniformPredicate()) { PredicatedMI *Divergent = Block.getDivergent(); if (isVCTP(Divergent->MI)) { - // The vctp will be removed, so the block mask of the VPST/VPT will need + // The vctp will be removed, so the block mask of the vp(s)t will need // to be recomputed. - LoLoop.BlockMasksToRecompute.insert(Block.getVPST()); - } else if (Block.IsOnlyPredicatedOn(LoLoop.VCTP)) { - // The VPT block has a non-uniform predicate but it's entry is guarded - // only by a vctp, which means we: + LoLoop.BlockMasksToRecompute.insert(Block.getPredicateThen()); + } else if (Block.isVPST() && Block.IsOnlyPredicatedOn(LoLoop.VCTP)) { + // The VPT block has a non-uniform predicate but it uses a vpst and its + // entry is guarded only by a vctp, which means we: // - Need to remove the original vpst. // - Then need to unpredicate any following instructions, until // we come across the divergent vpr def. @@ -1197,7 +1275,7 @@ // the divergent vpr def. // TODO: We could be producing more VPT blocks than necessary and could // fold the newly created one into a proceeding one. - for (auto I = ++MachineBasicBlock::iterator(Block.getVPST()), + for (auto I = ++MachineBasicBlock::iterator(Block.getPredicateThen()), E = ++MachineBasicBlock::iterator(Divergent->MI); I != E; ++I) RemovePredicate(&*I); @@ -1210,29 +1288,58 @@ ++Size; ++I; } - // Create a VPST with a null mask, we'll recompute it later. + // Create a VPST (with a null mask for now, we'll recompute it later). MachineInstrBuilder MIB = BuildMI(*InsertAt->getParent(), InsertAt, InsertAt->getDebugLoc(), TII->get(ARM::MVE_VPST)); MIB.addImm(0); - LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getVPST()); + LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getPredicateThen()); LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB); - LoLoop.ToRemove.insert(Block.getVPST()); + LoLoop.ToRemove.insert(Block.getPredicateThen()); LoLoop.BlockMasksToRecompute.insert(MIB.getInstr()); } - } else if (Block.IsOnlyPredicatedOn(LoLoop.VCTP)) { - // A vpt block which is only predicated upon vctp and has no internal vpr - // defs: + // Else, if the block uses a vpt, iterate over the block, removing the + // extra VCTPs it may contain. + else if (Block.isVPT()) { + bool RemovedVCTP = false; + for (PredicatedMI &Elt : Block.getInsts()) { + MachineInstr *MI = Elt.MI; + if (isVCTP(MI)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Removing VCTP: " << *MI); + LoLoop.ToRemove.insert(MI); + RemovedVCTP = true; + continue; + } + } + if (RemovedVCTP) + LoLoop.BlockMasksToRecompute.insert(Block.getPredicateThen()); + } + } else if (Block.IsOnlyPredicatedOn(LoLoop.VCTP) && Block.isVPST()) { + // A vpt block starting with VPST, is only predicated upon vctp and has no + // internal vpr defs: // - Remove vpst. // - Unpredicate the remaining instructions. - LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getVPST()); - LoLoop.ToRemove.insert(Block.getVPST()); + LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getPredicateThen()); + LoLoop.ToRemove.insert(Block.getPredicateThen()); for (auto &PredMI : Insts) RemovePredicate(PredMI.MI); } } - LLVM_DEBUG(dbgs() << "ARM Loops: Removing VCTP: " << *LoLoop.VCTP); + LLVM_DEBUG(dbgs() << "ARM Loops: Removing remaining VCTPs...\n"); + // Remove the "main" VCTP LoLoop.ToRemove.insert(LoLoop.VCTP); + LLVM_DEBUG(dbgs() << " " << *LoLoop.VCTP); + // Remove remaining secondary VCTPs + for (MachineInstr *VCTP : LoLoop.SecondaryVCTPs) { + // All VCTPs that aren't marked for removal yet should be unpredicated ones. + // The predicated ones should have already been marked for removal when + // visiting the VPT blocks. + if (LoLoop.ToRemove.insert(VCTP).second) { + assert(getVPTInstrPredicate(*VCTP) == ARMVCC::None && + "Removing Predicated VCTP without updating the block mask!"); + LLVM_DEBUG(dbgs() << " " << *VCTP); + } + } } void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) { Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -449,26 +449,17 @@ ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} -; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #3 -; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: add.w lr, lr, r12, lsr #2 -; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB5_1: @ %bb12 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r3 -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrwt.u32 q0, [r0] -; CHECK-NEXT: vpttt.i32 ne, q0, zr +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vptt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 le, q0, r2 -; CHECK-NEXT: vctpt.32 r3 ; CHECK-NEXT: vldrwt.u32 q1, [r1], #16 -; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vmul.i32 q0, q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q0, [r0], #16 -; CHECK-NEXT: le lr, .LBB5_1 +; CHECK-NEXT: letp lr, .LBB5_1 ; CHECK-NEXT: @ %bb.2: @ %bb32 ; CHECK-NEXT: pop {r7, pc} bb: Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir @@ -118,24 +118,16 @@ ; CHECK: bb.1.bb3: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 - ; CHECK: renamable $r12 = t2ADDri renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: $vpr = VMSR_P0 killed $r3, 14 /* CC::al */, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) ; CHECK: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg - ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = t2DLS killed renamable $lr + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2 ; CHECK: bb.2.bb9: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 + ; CHECK: liveins: $lr, $r0, $r1, $r3 ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) - ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg - ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) ; CHECK: MVE_VPST 4, implicit $vpr ; CHECK: renamable $vpr = MVE_VCMPi32r renamable $q0, $zr, 1, 1, killed renamable $vpr ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4) @@ -143,7 +135,7 @@ ; CHECK: MVE_VPST 8, implicit $vpr ; CHECK: MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) ; CHECK: $r0 = tMOVr $r3, 14 /* CC::al */, $noreg - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.bb27: ; CHECK: $sp = tADDspi $sp, 1, 14 /* CC::al */, $noreg ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir @@ -0,0 +1,834 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s + +--- | + @_ZL3arr = internal global [10 x i32] [i32 1, i32 2, i32 3, i32 5, i32 5, i32 5, i32 -2, i32 0, i32 -8, i32 -1], align 4 + @.str = private unnamed_addr constant [5 x i8] c"%d, \00", align 1 + + define arm_aapcs_vfpcc void @vpt_block(i32* nocapture %A, i32 %n, i32 %x) { + entry: + %cmp9 = icmp sgt i32 %n, 0 + %0 = add i32 %n, 3 + %1 = lshr i32 %0, 2 + %2 = shl nuw i32 %1, 2 + %3 = add i32 %2, -4 + %4 = lshr i32 %3, 2 + %5 = add nuw nsw i32 %4, 1 + br i1 %cmp9, label %vector.ph, label %for.cond.cleanup + + vector.ph: ; preds = %entry + %sub = sub nsw i32 0, %x + call void @llvm.set.loop.iterations.i32(i32 %5) + br label %vector.body + + vector.body: ; preds = %vector.body, %vector.ph + %lsr.iv1 = phi i32* [ %scevgep, %vector.body ], [ %A, %vector.ph ] + %6 = phi i32 [ %5, %vector.ph ], [ %18, %vector.body ] + %7 = phi i32 [ %n, %vector.ph ], [ %9, %vector.body ] + %lsr.iv12 = bitcast i32* %lsr.iv1 to <4 x i32>* + %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) + %9 = sub i32 %7, 4 + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv12, i32 4, <4 x i1> %8, <4 x i32> undef) + %10 = insertelement <4 x i32> undef, i32 %x, i32 0 + %11 = shufflevector <4 x i32> %10, <4 x i32> undef, <4 x i32> zeroinitializer + %12 = icmp slt <4 x i32> %wide.masked.load, %11 + %13 = insertelement <4 x i32> undef, i32 %sub, i32 0 + %14 = shufflevector <4 x i32> %13, <4 x i32> undef, <4 x i32> zeroinitializer + %15 = icmp sgt <4 x i32> %wide.masked.load, %14 + %16 = and <4 x i1> %12, %15 + %17 = and <4 x i1> %16, %8 + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> zeroinitializer, <4 x i32>* %lsr.iv12, i32 4, <4 x i1> %17) + %scevgep = getelementptr i32, i32* %lsr.iv1, i32 4 + %18 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) + %19 = icmp ne i32 %18, 0 + br i1 %19, label %vector.body, label %for.cond.cleanup + + for.cond.cleanup: ; preds = %vector.body, %entry + ret void + } + + define arm_aapcs_vfpcc void @different_vcpt_reaching_def(i32* nocapture %A, i32 %n, i32 %x) { + ; Intentionally left blank - see MIR sequence below. + entry: + unreachable + vector.body: + unreachable + for.cond.cleanup: + unreachable + } + + define arm_aapcs_vfpcc void @different_vcpt_operand(i32* nocapture %A, i32 %n, i32 %x) { + ; Intentionally left blank - see MIR sequence below. + entry: + unreachable + vector.body: + unreachable + for.cond.cleanup: + unreachable + } + + define arm_aapcs_vfpcc void @else_vcpt(i32* nocapture %data, i32 %N, i32 %T) { + entry: + %cmp9 = icmp sgt i32 %N, 0 + %0 = add i32 %N, 3 + %1 = lshr i32 %0, 2 + %2 = shl nuw i32 %1, 2 + %3 = add i32 %2, -4 + %4 = lshr i32 %3, 2 + %5 = add nuw nsw i32 %4, 1 + br i1 %cmp9, label %vector.ph, label %for.cond.cleanup + + vector.ph: ; preds = %entry + %sub = sub nsw i32 0, %T + call void @llvm.set.loop.iterations.i32(i32 %5) + br label %vector.body + + vector.body: ; preds = %vector.body, %vector.ph + %lsr.iv1 = phi i32* [ %scevgep, %vector.body ], [ %data, %vector.ph ] + %6 = phi i32 [ %5, %vector.ph ], [ %18, %vector.body ] + %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] + %lsr.iv12 = bitcast i32* %lsr.iv1 to <4 x i32>* + %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) + %9 = sub i32 %7, 4 + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv12, i32 4, <4 x i1> %8, <4 x i32> undef) + %10 = insertelement <4 x i32> undef, i32 %T, i32 0 + %11 = shufflevector <4 x i32> %10, <4 x i32> undef, <4 x i32> zeroinitializer + %12 = icmp slt <4 x i32> %wide.masked.load, %11 + %13 = insertelement <4 x i32> undef, i32 %sub, i32 0 + %14 = shufflevector <4 x i32> %13, <4 x i32> undef, <4 x i32> zeroinitializer + %15 = icmp sgt <4 x i32> %wide.masked.load, %14 + %16 = or <4 x i1> %12, %15 + %17 = and <4 x i1> %16, %8 + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> zeroinitializer, <4 x i32>* %lsr.iv12, i32 4, <4 x i1> %17) + %scevgep = getelementptr i32, i32* %lsr.iv1, i32 4 + %18 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) + %19 = icmp ne i32 %18, 0 + br i1 %19, label %vector.body, label %for.cond.cleanup + + for.cond.cleanup: ; preds = %vector.body, %entry + ret void + } + + define arm_aapcs_vfpcc void @loop_invariant_vpt_operands(i32* nocapture %A, i32 %n, i32 %x) { + ; Intentionally left blank - see MIR sequence below. + entry: + unreachable + vector.body: + unreachable + for.cond.cleanup: + unreachable + } + + define arm_aapcs_vfpcc void @bad_vpt(i32* nocapture %A, i32 %n, i32 %x) { + ; Intentionally left blank - see MIR sequence below. + entry: + unreachable + vector.body: + unreachable + for.cond.cleanup: + unreachable + } + + declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) + declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare void @llvm.set.loop.iterations.i32(i32) + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) + declare <4 x i1> @llvm.arm.mve.vctp32(i32) +... +--- +name: vpt_block +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: vpt_block + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 11, 8, implicit-def $itstate + ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1 + ; CHECK: bb.1.vector.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $lr, $q0, $r0, $r2, $r3 + ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 0, killed $noreg + ; CHECK: MVE_VPTv4s32r 4, renamable $q1, renamable $r2, 11, implicit-def $vpr + ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 12, 1, killed renamable $vpr + ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 + ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2IT 11, 8, implicit-def $itstate + frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg + renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg + renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg + renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg + t2DoLoopStart renamable $lr + + bb.1.vector.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $q0, $r0, $r1, $r2, $r3 + + renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg + MVE_VPST 8, implicit $vpr + renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, killed renamable $vpr + MVE_VPTv4s32r 2, renamable $q1, renamable $r2, 11, implicit-def $vpr + renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 12, 1, killed renamable $vpr + renamable $vpr = MVE_VCTP32 renamable $r1, 1, killed renamable $vpr + renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr + renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14 /* CC::al */, $noreg + + bb.2.for.cond.cleanup: + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc +... +--- +name: different_vcpt_reaching_def +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: different_vcpt_reaching_def + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 11, 8, implicit-def $itstate + ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg + ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg + ; CHECK: $lr = t2DLS killed renamable $lr + ; CHECK: bb.1.vector.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $r1 = MVE_VSTRWU32_post renamable $q0, killed renamable $r1, 16, 1, renamable $vpr + ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, killed renamable $vpr + ; CHECK: MVE_VPTv4s32r 2, renamable $q1, renamable $r2, 11, implicit-def $vpr + ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 12, 1, killed renamable $vpr + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 1, killed renamable $vpr + ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr + ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 + ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc + ; + ; Tests that secondary VCTPs are refused when their operand's reaching definition is not the same as the main + ; VCTP's. + ; + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2IT 11, 8, implicit-def $itstate + frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg + renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg + renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg + renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg + t2DoLoopStart renamable $lr + + bb.1.vector.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $q0, $r0, $r1, $r2, $r3 + + renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg + MVE_VPST 8, implicit $vpr + renamable $r1 = MVE_VSTRWU32_post renamable $q0, killed renamable $r1, 16, 1, renamable $vpr + renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, killed renamable $vpr + MVE_VPTv4s32r 2, renamable $q1, renamable $r2, 11, implicit-def $vpr + renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 12, 1, killed renamable $vpr + renamable $vpr = MVE_VCTP32 renamable $r1, 1, killed renamable $vpr + renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr + renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14 /* CC::al */, $noreg + + bb.2.for.cond.cleanup: + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc +... +--- +name: different_vcpt_operand +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: different_vcpt_operand + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 11, 8, implicit-def $itstate + ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg + ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg + ; CHECK: $lr = t2DLS killed renamable $lr + ; CHECK: bb.1.vector.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, killed renamable $vpr + ; CHECK: MVE_VPTv4s32r 2, renamable $q1, renamable $r2, 11, implicit-def $vpr + ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 12, 1, killed renamable $vpr + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr + ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr + ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 + ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc + ; + ; Tests that secondary VCTPs are refused when their operand is not the same register as the main VCTP's. + ; + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2IT 11, 8, implicit-def $itstate + frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg + renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg + renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg + renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg + t2DoLoopStart renamable $lr + + bb.1.vector.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $q0, $r0, $r1, $r2, $r3 + + renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg + MVE_VPST 8, implicit $vpr + renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, killed renamable $vpr + MVE_VPTv4s32r 2, renamable $q1, renamable $r2, 11, implicit-def $vpr + renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 12, 1, killed renamable $vpr + renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr + renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr + renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14 /* CC::al */, $noreg + + bb.2.for.cond.cleanup: + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc +... +--- +name: else_vcpt +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: else_vcpt + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 11, 8, implicit-def $itstate + ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1 + ; CHECK: bb.1.vector.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $lr, $q0, $r0, $r2, $r3 + ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 0, killed $noreg + ; CHECK: MVE_VPTv4s32r 12, renamable $q1, renamable $r2, 10, implicit-def $vpr + ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 13, 1, killed renamable $vpr + ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 2, killed renamable $vpr + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 + ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc + ; + ; Test including a else-predicated VCTP. + ; + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2IT 11, 8, implicit-def $itstate + frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg + renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg + renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg + renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg + t2DoLoopStart renamable $lr + + bb.1.vector.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $q0, $r0, $r1, $r2, $r3 + + renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg + MVE_VPST 8, implicit $vpr + renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, killed renamable $vpr + MVE_VPTv4s32r 14, renamable $q1, renamable $r2, 10, implicit-def $vpr + renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 13, 1, killed renamable $vpr + renamable $vpr = MVE_VCTP32 renamable $r1, 2, killed renamable $vpr + renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 2, killed renamable $vpr + renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14 /* CC::al */, $noreg + + bb.2.for.cond.cleanup: + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc +... +--- +name: loop_invariant_vpt_operands +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: loop_invariant_vpt_operands + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 11, 8, implicit-def $itstate + ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1 + ; CHECK: bb.1.vector.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $lr, $q0, $r0, $r2, $r3 + ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 0, killed $noreg + ; CHECK: MVE_VPTv4s32r 4, renamable $q0, renamable $r2, 11, implicit-def $vpr + ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 12, 1, killed renamable $vpr + ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 + ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2IT 11, 8, implicit-def $itstate + frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg + renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg + renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg + renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg + t2DoLoopStart renamable $lr + + bb.1.vector.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $q0, $r0, $r1, $r2, $r3 + + renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg + MVE_VPST 8, implicit $vpr + renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, killed renamable $vpr + MVE_VPTv4s32r 2, renamable $q0, renamable $r2, 11, implicit-def $vpr + renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 12, 1, killed renamable $vpr + renamable $vpr = MVE_VCTP32 renamable $r1, 1, killed renamable $vpr + renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr + renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14 /* CC::al */, $noreg + + bb.2.for.cond.cleanup: + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc +... +--- +name: bad_vpt +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: bad_vpt + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 11, 8, implicit-def $itstate + ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg + ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg + ; CHECK: $lr = t2DLS killed renamable $lr + ; CHECK: bb.1.vector.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, killed renamable $vpr + ; CHECK: MVE_VPTv4s32r 2, renamable $q0, renamable $r2, 11, implicit-def $vpr + ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, renamable $r3, 12, 1, killed renamable $vpr + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 1, killed renamable $vpr + ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr + ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + ; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 0, $noreg + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 + ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc + ; + ; This VPT has one operand that's defined by a non-predicated instruction, so it should + ; be refused. + ; + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2IT 11, 8, implicit-def $itstate + frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg + renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg + renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg + renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg + t2DoLoopStart renamable $lr + + bb.1.vector.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $q0, $r0, $r1, $r2, $r3 + + renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg + MVE_VPST 8, implicit $vpr + renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, killed renamable $vpr + MVE_VPTv4s32r 2, renamable $q0, renamable $r2, 11, implicit-def $vpr + renamable $vpr = MVE_VCMPs32r renamable $q1, renamable $r3, 12, 1, killed renamable $vpr + renamable $vpr = MVE_VCTP32 renamable $r1, 1, killed renamable $vpr + renamable $r0 = MVE_VSTRWU32_post renamable $q0, renamable $r0, 16, 1, killed renamable $vpr + renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + renamable $r2 = MVE_VSTRWU32_post renamable $q1, renamable $r2, 16, 0, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14 /* CC::al */, $noreg + + bb.2.for.cond.cleanup: + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc +...