Index: llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp =================================================================== --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -73,6 +73,15 @@ #define DEBUG_TYPE "arm-low-overhead-loops" #define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass" +static bool isVectorPredicated(MachineInstr *MI) { + int PIdx = llvm::findFirstVPTPredOperandIdx(*MI); + return PIdx != -1 && MI->getOperand(PIdx + 1).getReg() == ARM::VPR; +} + +static bool isVectorPredicate(MachineInstr *MI) { + return MI->findRegisterDefOperandIdx(ARM::VPR) != -1; +} + namespace { using InstSet = SmallPtrSetImpl; @@ -148,56 +157,127 @@ // the block are predicated upon the vpr and we allow instructions to define // the vpr within in the block too. class VPTBlock { - // The predicate then instruction, which is either a VPT, or a VPST - // instruction. - std::unique_ptr PredicateThen; - PredicatedMI *Divergent = nullptr; - SmallVector Insts; + friend struct LowOverheadLoop; - public: - VPTBlock(MachineInstr *MI, SetVector &Preds) { - PredicateThen = std::make_unique(MI, Preds); + SmallVector Insts; + + static SmallVector Blocks; + static SetVector CurrentPredicate; + static std::map> PredicatedInsts; + + static void Create(MachineInstr *MI) { + assert(CurrentPredicate.size() && "Can't begin VPT without predicate"); + Blocks.emplace_back(MI); + PredicatedInsts.emplace( + MI, std::make_unique(MI, CurrentPredicate)); } - void addInst(MachineInstr *MI, SetVector &Preds) { - if (!Divergent && !set_difference(Preds, PredicateThen->Predicates).empty()) { - Divergent = &Insts.back(); - LLVM_DEBUG(dbgs() << " - has divergent predicate: " << *Divergent->MI); - } - Insts.emplace_back(MI, Preds); - assert(Insts.size() <= 4 && "Too many instructions in VPT block!"); + static void reset() { + Blocks.clear(); + PredicatedInsts.clear(); + CurrentPredicate.clear(); + } + + static void addInst(MachineInstr *MI) { + Blocks.back().insert(MI); + PredicatedInsts.emplace( + MI, std::make_unique(MI, CurrentPredicate)); } + static void addPredicate(MachineInstr *MI) { + LLVM_DEBUG(dbgs() << "ARM Loops: Adding VPT Predicate: " << *MI); + CurrentPredicate.insert(MI); + } + + static void resetPredicate(MachineInstr *MI) { + LLVM_DEBUG(dbgs() << "ARM Loops: Resetting VPT Predicate: " << *MI); + CurrentPredicate.clear(); + CurrentPredicate.insert(MI); + } + + public: // Have we found an instruction within the block which defines the vpr? If // so, not all the instructions in the block will have the same predicate. - bool HasNonUniformPredicate() const { - return Divergent != nullptr; + static bool hasUniformPredicate(VPTBlock &Block) { + return getDivergent(Block) == nullptr; } - // Is the given instruction part of the predicate set controlling the entry - // to the block. - bool IsPredicatedOn(MachineInstr *MI) const { - return PredicateThen->Predicates.count(MI); + // If it exists, return the first internal instruction which modifies the + // VPR. + static MachineInstr *getDivergent(VPTBlock &Block) { + SmallVectorImpl &Insts = Block.getInsts(); + for (unsigned i = 1; i < Insts.size(); ++i) { + MachineInstr *Next = Insts[i]; + if (isVectorPredicate(Next)) + return Next; // Found an instruction altering the vpr. + } + return nullptr; } - // Returns true if this is a VPT instruction. - bool isVPT() const { return !isVPST(); } + // Return whether the given instruction is predicated upon a VCTP. + static bool isPredicatedOnVCTP(MachineInstr *MI, bool Exclusive = false) { + SetVector &Predicates = PredicatedInsts[MI]->Predicates; + if (Exclusive && Predicates.size() != 1) + return false; + for (auto *PredMI : Predicates) + if (isVCTP(PredMI)) + return true; + return false; + } - // Returns true if this is a VPST instruction. - bool isVPST() const { - return PredicateThen->MI->getOpcode() == ARM::MVE_VPST; + // Is the VPST, controlling the block entry, predicated upon a VCTP. + static bool isEntryPredicatedOnVCTP(VPTBlock &Block, + bool Exclusive = false) { + SmallVectorImpl &Insts = Block.getInsts(); + return isPredicatedOnVCTP(Insts.front(), Exclusive); } - // Is the given instruction the only predicate which controls the entry to - // the block. - bool IsOnlyPredicatedOn(MachineInstr *MI) const { - return IsPredicatedOn(MI) && PredicateThen->Predicates.size() == 1; + static bool isValid() { + // All predication within the loop should be based on vctp. If the block + // isn't predicated on entry, check whether the vctp is within the block + // and that all other instructions are then predicated on it. + for (auto &Block : Blocks) { + if (isEntryPredicatedOnVCTP(Block)) + continue; + + SmallVectorImpl &Insts = Block.getInsts(); + for (auto *MI : Insts) { + // Check that any internal VCTPs are 'Then' predicated. + if (isVCTP(MI) && getVPTInstrPredicate(*MI) != ARMVCC::Then) + return false; + // Skip other instructions that build up the predicate. + if (MI->getOpcode() == ARM::MVE_VPST || isVectorPredicate(MI)) + continue; + // Check that any other instructions are predicated upon a vctp. + // TODO: We could infer when VPTs are implicitly predicated on the + // vctp (when the operands are predicated). + if (!isPredicatedOnVCTP(MI)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Can't convert: " << *MI); + return false; + } + } + } + return true; + } + + VPTBlock(MachineInstr *MI) { Insts.push_back(MI); } + + void insert(MachineInstr *MI) { + Insts.push_back(MI); + // VPT/VPST + 4 predicated instructions. + assert(Insts.size() <= 5 && "Too many instructions in VPT block!"); + } + + bool containsVCTP() const { + for (auto *MI : Insts) + if (isVCTP(MI)) + return true; + return false; } unsigned size() const { return Insts.size(); } - SmallVectorImpl &getInsts() { return Insts; } - MachineInstr *getPredicateThen() const { return PredicateThen->MI; } - PredicatedMI *getDivergent() const { return Divergent; } + SmallVectorImpl &getInsts() { return Insts; } }; struct LowOverheadLoop { @@ -216,9 +296,6 @@ MachineInstr *VCTP = nullptr; MachineOperand TPNumElements; SmallPtrSet SecondaryVCTPs; - VPTBlock *CurrentBlock = nullptr; - SetVector CurrentPredicate; - SmallVector VPTBlocks; SmallPtrSet ToRemove; SmallPtrSet BlockMasksToRecompute; bool Revert = false; @@ -234,6 +311,7 @@ Preheader = MBB; else if (auto *MBB = MLI.findLoopPreheader(&ML, true)) Preheader = MBB; + VPTBlock::reset(); } // If this is an MVE instruction, check that we know how to use tail @@ -274,7 +352,9 @@ return Start && Dec && End; } - SmallVectorImpl &getVPTBlocks() { return VPTBlocks; } + SmallVectorImpl &getVPTBlocks() { + return VPTBlock::Blocks; + } // Return the operand for the loop start instruction. This will be the loop // iteration count, or the number of elements if we're tail predicating. @@ -358,6 +438,11 @@ char ARMLowOverheadLoops::ID = 0; +SmallVector VPTBlock::Blocks; +SetVector VPTBlock::CurrentPredicate; +std::map> VPTBlock::PredicatedInsts; + INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME, false, false) @@ -396,37 +481,9 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { assert(VCTP && "VCTP instruction expected but is not set"); - // All predication within the loop should be based on vctp. If the block - // isn't predicated on entry, check whether the vctp is within the block - // and that all other instructions are then predicated on it. - for (auto &Block : VPTBlocks) { - if (Block.IsPredicatedOn(VCTP)) - continue; - if (Block.HasNonUniformPredicate() && !isVCTP(Block.getDivergent()->MI)) { - LLVM_DEBUG(dbgs() << "ARM Loops: Found unsupported diverging predicate: " - << *Block.getDivergent()->MI); - return false; - } - SmallVectorImpl &Insts = Block.getInsts(); - for (auto &PredMI : Insts) { - // Check the instructions in the block and only allow: - // - VCTPs - // - Instructions predicated on the main VCTP - // - Any VCMP - // - VCMPs just "and" their result with VPR.P0. Whether they are - // located before/after the VCTP is irrelevant - the end result will - // be the same in both cases, so there's no point in requiring them - // to be located after the VCTP! - if (PredMI.Predicates.count(VCTP) || isVCTP(PredMI.MI) || - VCMPOpcodeToVPT(PredMI.MI->getOpcode()) != 0) - continue; - LLVM_DEBUG(dbgs() << "ARM Loops: Can't convert: " << *PredMI.MI - << " - which is predicated on:\n"; - for (auto *MI : PredMI.Predicates) - dbgs() << " - " << *MI); - return false; - } - } + + if (!VPTBlock::isValid()) + return false; if (!ValidateLiveOuts()) { LLVM_DEBUG(dbgs() << "ARM Loops: Invalid live outs.\n"); @@ -562,11 +619,6 @@ return true; } -static bool isVectorPredicated(MachineInstr *MI) { - int PIdx = llvm::findFirstVPTPredOperandIdx(*MI); - return PIdx != -1 && MI->getOperand(PIdx + 1).getReg() == ARM::VPR; -} - static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class) { return MO.isReg() && MO.getReg() && Class->contains(MO.getReg()); @@ -906,16 +958,9 @@ if (MI->getOpcode() != ARM::MVE_VPST) { assert(MI->findRegisterDefOperandIdx(ARM::VPR) != -1 && "VPT does not implicitly define VPR?!"); - CurrentPredicate.clear(); - CurrentPredicate.insert(MI); + VPTBlock::resetPredicate(MI); } - assert(CurrentPredicate.size() && "Can't begin VPT without predicate"); - VPTBlocks.emplace_back(MI, CurrentPredicate); - CurrentBlock = &VPTBlocks.back(); - LLVM_DEBUG(dbgs() << "ARM Loops: Created new VPT block with predicate:\n"; - for (auto *PI : CurrentPredicate) - dbgs() << " - " << *PI; - dbgs() << " - at: " << *MI); + VPTBlock::Create(MI); return true; } @@ -931,7 +976,7 @@ continue; if (ARM::isVpred(MCID.OpInfo[i].OperandType)) { - CurrentBlock->addInst(MI, CurrentPredicate); + VPTBlock::addInst(MI); IsUse = true; } else { LLVM_DEBUG(dbgs() << "ARM Loops: Found instruction using vpr: " << *MI); @@ -975,9 +1020,9 @@ if (IsDef) { // Clear the existing predicate when we're not in VPT Active state. if (!isVectorPredicated(MI)) - CurrentPredicate.clear(); - CurrentPredicate.insert(MI); - LLVM_DEBUG(dbgs() << "ARM Loops: Adding Predicate: " << *MI); + VPTBlock::resetPredicate(MI); + else + VPTBlock::addPredicate(MI); } return true; } @@ -1312,23 +1357,20 @@ llvm_unreachable("trying to unpredicate a non-predicated instruction"); }; - // There are a few scenarios which we have to fix up: - // 1. VPT Blocks with non-uniform predicates: - // - a. When the divergent instruction is a vctp - // - b. When the block uses a vpst, and is only predicated on the vctp - // - c. When the block uses a vpt and (optionally) contains one or more - // vctp. - // 2. VPT Blocks with uniform predicates: - // - a. The block uses a vpst, and is only predicated on the vctp for (auto &Block : LoLoop.getVPTBlocks()) { - SmallVectorImpl &Insts = Block.getInsts(); - if (Block.HasNonUniformPredicate()) { - PredicatedMI *Divergent = Block.getDivergent(); - if (isVCTP(Divergent->MI)) { - // The vctp will be removed, so the block mask of the vp(s)t will need - // to be recomputed. - LoLoop.BlockMasksToRecompute.insert(Block.getPredicateThen()); - } else if (Block.isVPST() && Block.IsOnlyPredicatedOn(LoLoop.VCTP)) { + SmallVectorImpl &Insts = Block.getInsts(); + + if (VPTBlock::isEntryPredicatedOnVCTP(Block, /*exclusive*/true)) { + if (VPTBlock::hasUniformPredicate(Block)) { + // A vpt block starting with VPST, is only predicated upon vctp and has no + // internal vpr defs: + // - Remove vpst. + // - Unpredicate the remaining instructions. + LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Insts.front()); + LoLoop.ToRemove.insert(Insts.front()); + for (unsigned i = 1; i < Insts.size(); ++i) + RemovePredicate(Insts[i]); + } else { // The VPT block has a non-uniform predicate but it uses a vpst and its // entry is guarded only by a vctp, which means we: // - Need to remove the original vpst. @@ -1338,13 +1380,14 @@ // the divergent vpr def. // TODO: We could be producing more VPT blocks than necessary and could // fold the newly created one into a proceeding one. - for (auto I = ++MachineBasicBlock::iterator(Block.getPredicateThen()), - E = ++MachineBasicBlock::iterator(Divergent->MI); I != E; ++I) + MachineInstr *Divergent = VPTBlock::getDivergent(Block); + for (auto I = ++MachineBasicBlock::iterator(Insts.front()), + E = ++MachineBasicBlock::iterator(Divergent); I != E; ++I) RemovePredicate(&*I); unsigned Size = 0; - auto E = MachineBasicBlock::reverse_iterator(Divergent->MI); - auto I = MachineBasicBlock::reverse_iterator(Insts.back().MI); + auto E = MachineBasicBlock::reverse_iterator(Divergent); + auto I = MachineBasicBlock::reverse_iterator(Insts.back()); MachineInstr *InsertAt = nullptr; while (I != E) { InsertAt = &*I; @@ -1356,53 +1399,20 @@ InsertAt->getDebugLoc(), TII->get(ARM::MVE_VPST)); MIB.addImm(0); - LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getPredicateThen()); + LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Insts.front()); LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB); - LoLoop.ToRemove.insert(Block.getPredicateThen()); + LoLoop.ToRemove.insert(Insts.front()); LoLoop.BlockMasksToRecompute.insert(MIB.getInstr()); } - // Else, if the block uses a vpt, iterate over the block, removing the - // extra VCTPs it may contain. - else if (Block.isVPT()) { - bool RemovedVCTP = false; - for (PredicatedMI &Elt : Block.getInsts()) { - MachineInstr *MI = Elt.MI; - if (isVCTP(MI)) { - LLVM_DEBUG(dbgs() << "ARM Loops: Removing VCTP: " << *MI); - LoLoop.ToRemove.insert(MI); - RemovedVCTP = true; - continue; - } - } - if (RemovedVCTP) - LoLoop.BlockMasksToRecompute.insert(Block.getPredicateThen()); - } - } else if (Block.IsOnlyPredicatedOn(LoLoop.VCTP) && Block.isVPST()) { - // A vpt block starting with VPST, is only predicated upon vctp and has no - // internal vpr defs: - // - Remove vpst. - // - Unpredicate the remaining instructions. - LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getPredicateThen()); - LoLoop.ToRemove.insert(Block.getPredicateThen()); - for (auto &PredMI : Insts) - RemovePredicate(PredMI.MI); + } else if (Block.containsVCTP()) { + // The vctp will be removed, so the block mask of the vp(s)t will need + // to be recomputed. + LoLoop.BlockMasksToRecompute.insert(Insts.front()); } } - LLVM_DEBUG(dbgs() << "ARM Loops: Removing remaining VCTPs...\n"); - // Remove the "main" VCTP + LoLoop.ToRemove.insert(LoLoop.VCTP); - LLVM_DEBUG(dbgs() << " " << *LoLoop.VCTP); - // Remove remaining secondary VCTPs - for (MachineInstr *VCTP : LoLoop.SecondaryVCTPs) { - // All VCTPs that aren't marked for removal yet should be unpredicated ones. - // The predicated ones should have already been marked for removal when - // visiting the VPT blocks. - if (LoLoop.ToRemove.insert(VCTP).second) { - assert(getVPTInstrPredicate(*VCTP) == ARMVCC::None && - "Removing Predicated VCTP without updating the block mask!"); - LLVM_DEBUG(dbgs() << " " << *VCTP); - } - } + LoLoop.ToRemove.insert(LoLoop.SecondaryVCTPs.begin(), LoLoop.SecondaryVCTPs.end()); } void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) { Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -465,28 +465,19 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB5_1: @ %bb4 -; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #3 -; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: add.w lr, lr, r12, lsr #2 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB5_2: @ %bb12 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r3 -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrwt.u32 q0, [r0] -; CHECK-NEXT: vpttt.i32 ne, q0, zr +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vptt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 le, q0, r2 -; CHECK-NEXT: vctpt.32 r3 ; CHECK-NEXT: vldrwt.u32 q1, [r1], #16 ; CHECK-NEXT: add.w r12, r12, #4 -; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vmul.i32 q0, q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q0, [r0], #16 -; CHECK-NEXT: le lr, .LBB5_2 +; CHECK-NEXT: letp lr, .LBB5_2 ; CHECK-NEXT: @ %bb.3: @ %bb32 ; CHECK-NEXT: pop {r7, pc} bb: Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll @@ -5,26 +5,20 @@ ; CHECK-LABEL: minmaxval4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: mov.w lr, #3 ; CHECK-NEXT: vmov.i32 q0, #0x80000000 ; CHECK-NEXT: vmvn.i32 q1, #0x80000000 ; CHECK-NEXT: movs r2, #10 -; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r2 +; CHECK-NEXT: vldrw.u32 q2, [r0], #16 +; CHECK-NEXT: vcmp.s32 gt, q2, q0 ; CHECK-NEXT: vpst -; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 -; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: vpstt -; CHECK-NEXT: vcmpt.s32 gt, q2, q0 ; CHECK-NEXT: vmovt q0, q2 -; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vpstt -; CHECK-NEXT: vcmpt.s32 gt, q1, q2 +; CHECK-NEXT: vcmp.s32 gt, q1, q2 +; CHECK-NEXT: vpst ; CHECK-NEXT: vmovt q1, q2 -; CHECK-NEXT: le lr, .LBB0_1 +; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %middle.block ; CHECK-NEXT: mvn r0, #-2147483648 ; CHECK-NEXT: vminv.s32 r0, q1 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir @@ -118,32 +118,24 @@ ; CHECK: bb.1.bb3: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 - ; CHECK: renamable $r12 = t2ADDri renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: $vpr = VMSR_P0 killed $r3, 14 /* CC::al */, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) ; CHECK: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg - ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = t2DLS killed renamable $lr + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2 ; CHECK: bb.2.bb9: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 + ; CHECK: liveins: $lr, $r0, $r1, $r3 ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) - ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr + ; CHECK: MVE_VPST 8, implicit $vpr ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg - ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg - ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $vpr = MVE_VCMPi32r renamable $q0, $zr, 1, 1, killed renamable $vpr + ; CHECK: renamable $vpr = MVE_VCMPi32r renamable $q0, $zr, 1, 0, killed $noreg + ; CHECK: MVE_VPST 8, implicit $vpr ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4) ; CHECK: renamable $q0 = nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VPST 8, implicit $vpr ; CHECK: MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) ; CHECK: $r0 = tMOVr $r3, 14 /* CC::al */, $noreg - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.bb27: ; CHECK: $sp = tADDspi $sp, 1, 14 /* CC::al */, $noreg ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir @@ -215,26 +215,17 @@ ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg - ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg - ; CHECK: $lr = t2DLS killed renamable $lr + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1 ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3 - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, killed renamable $vpr - ; CHECK: MVE_VPTv4s32r 2, renamable $q1, renamable $r2, 11, implicit-def $vpr + ; CHECK: liveins: $lr, $q0, $r0, $r2, $r3 + ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 0, killed $noreg + ; CHECK: MVE_VPTv4s32r 4, renamable $q1, renamable $r2, 11, implicit-def $vpr ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 12, 1, killed renamable $vpr - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 1, killed renamable $vpr ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr - ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: @@ -731,26 +722,17 @@ ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg - ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg - ; CHECK: $lr = t2DLS killed renamable $lr + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1 ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3 - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, killed renamable $vpr - ; CHECK: MVE_VPTv4s32r 2, renamable $q0, renamable $r2, 11, implicit-def $vpr + ; CHECK: liveins: $lr, $q0, $r0, $r2, $r3 + ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 0, killed $noreg + ; CHECK: MVE_VPTv4s32r 4, renamable $q0, renamable $r2, 11, implicit-def $vpr ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 12, 1, killed renamable $vpr - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 1, killed renamable $vpr ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr - ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: