Index: llvm/lib/Target/ARM/MVEVPTBlockPass.cpp =================================================================== --- llvm/lib/Target/ARM/MVEVPTBlockPass.cpp +++ llvm/lib/Target/ARM/MVEVPTBlockPass.cpp @@ -34,30 +34,30 @@ #define DEBUG_TYPE "arm-mve-vpt" namespace { - class MVEVPTBlock : public MachineFunctionPass { - public: - static char ID; - const Thumb2InstrInfo *TII; - const TargetRegisterInfo *TRI; +class MVEVPTBlock : public MachineFunctionPass { +public: + static char ID; + const Thumb2InstrInfo *TII; + const TargetRegisterInfo *TRI; - MVEVPTBlock() : MachineFunctionPass(ID) {} + MVEVPTBlock() : MachineFunctionPass(ID) {} - bool runOnMachineFunction(MachineFunction &Fn) override; + bool runOnMachineFunction(MachineFunction &Fn) override; - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); - } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } - StringRef getPassName() const override { - return "MVE VPT block insertion pass"; - } + StringRef getPassName() const override { + return "MVE VPT block insertion pass"; + } - private: - bool InsertVPTBlocks(MachineBasicBlock &MBB); - }; +private: + bool InsertVPTBlocks(MachineBasicBlock &MBB); +}; - char MVEVPTBlock::ID = 0; +char MVEVPTBlock::ID = 0; } // end anonymous namespace @@ -94,24 +94,185 @@ return &*CmpMI; } +static ARM::PredBlockMask ExpandBlockMask(ARM::PredBlockMask BlockMask, + unsigned Count, + ARMVCC::VPTCodes Kind) { + using PredBlockMask = ARM::PredBlockMask; + assert(Count != 0 && Kind != ARMVCC::None); + assert(countTrailingZeros((unsigned)BlockMask) != 0 && + "Mask is already full"); + + auto Done = [&](PredBlockMask AddedThen, PredBlockMask AddedElse) { + PredBlockMask Mask = (Kind == ARMVCC::Then) ? AddedThen : AddedElse; + return (Count == 1) ? Mask : ExpandBlockMask(Mask, Count - 1, Kind); + }; + + switch (BlockMask) { + case PredBlockMask::T: + return Done(PredBlockMask::TT, PredBlockMask::TE); + case PredBlockMask::TT: + return Done(PredBlockMask::TTT, PredBlockMask::TTE); + case PredBlockMask::TE: + return Done(PredBlockMask::TET, PredBlockMask::TEE); + case PredBlockMask::TTT: + return Done(PredBlockMask::TTTT, PredBlockMask::TTTE); + case PredBlockMask::TTE: + return Done(PredBlockMask::TTET, PredBlockMask::TTEE); + case PredBlockMask::TET: + return Done(PredBlockMask::TETT, PredBlockMask::TETE); + case PredBlockMask::TEE: + return Done(PredBlockMask::TEET, PredBlockMask::TEEE); + default: + llvm_unreachable("Unknown Mask"); + } +} + +// Simply skips a block of predicated instructions. +// Returns true if this successfully skipped the whole block of predicated +// instructions. Returns false when it stopped early (due to MaxSkips), or if +// Iter didn't point to a predicated instruction. +static bool SkipPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter, + MachineBasicBlock::instr_iterator EndIter, + unsigned MaxSkips, unsigned &NumInstSkipped) { + ARMVCC::VPTCodes NextPred = ARMVCC::None; + unsigned PredReg; + NumInstSkipped = 0; + + while (Iter != EndIter) { + NextPred = getVPTInstrPredicate(*Iter, PredReg); + assert(NextPred != ARMVCC::Else && + "VPT block pass does not expect Else preds"); + if (NextPred == ARMVCC::None || MaxSkips == 0) + break; + --MaxSkips; + ++Iter; + ++NumInstSkipped; + }; + + return NumInstSkipped != 0 && (NextPred == ARMVCC::None || Iter == EndIter); +} + +// Returns true if at least one instruction in the range [Iter, End) defines +// or kills VPR. +static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter, + MachineBasicBlock::iterator End) { + for (; Iter != End; ++Iter) + if (Iter->definesRegister(ARM::VPR) || + Iter->findRegisterUseOperandIdx(ARM::VPR, true) != -1) + return true; + return false; +} + +// Given an iterator (Iter) that points at an instruction with a "Then" +// predicate, tries to create the largest block of continuous predicated +// instructions possible, and returns the VPT Block Mask of that block. +// +// This will try to perform some minor optimization in order to maximize the +// size of the block. +static ARM::PredBlockMask +CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, + MachineBasicBlock::instr_iterator EndIter, + SmallVectorImpl &DeadInstructions) { + MachineBasicBlock::instr_iterator BlockBeg = Iter; + assert(getVPTInstrPredicate(*Iter) == ARMVCC::Then); + + LLVM_DEBUG(dbgs() << "VPT block created for: "; Iter->dump()); + + unsigned BlockSize; + SkipPredicatedInstrs(Iter, EndIter, 4, BlockSize); + + LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter = + std::next(BlockBeg); + AddedInstIter != Iter; ++AddedInstIter) { + dbgs() << " adding: "; + AddedInstIter->dump(); + }); + + // Generate the initial BlockMask + ARM::PredBlockMask BlockMask = getARMVPTBlockMask(BlockSize); + + // Remove VPNOTs while there's still room in the block, so we can make the + // largest block possible. + ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Then; + while (BlockSize < 4 && Iter != EndIter && + Iter->getOpcode() == ARM::MVE_VPNOT) { + + // Try to skip all of the predicated instructions after the VPNOT, stopping + // after (4 - VPTThenInstCnt). If we can't skip them all, stop. + unsigned ElseInstCnt = 0; + MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(Iter); + if (!SkipPredicatedInstrs(VPNOTBlockEndIter, EndIter, (4 - BlockSize), + ElseInstCnt)) + break; + + // Check if this VPNOT can be removed or not: It can only be removed if at + // least one of the predicated instruction that follows it kills or sets + // VPR. + if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter)) + break; + + LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump();); + + // Record the new size of the block + BlockSize += ElseInstCnt; + assert(BlockSize <= 4 && "Block is too large!"); + + // Record the VPNot to remove it later. + DeadInstructions.push_back(&*Iter); + ++Iter; + + // Replace "then" by "elses" in the block until we find an instruction that + // defines VPR, then after that leave everything to "t". + // Note that we are using "Iter" to iterate over the block so we can update + // it at the same time. + bool ChangeToElse = (CurrentPredicate == ARMVCC::Then); + for (; Iter != VPNOTBlockEndIter; ++Iter) { + // Find the register in which the predicate is + int OpIdx = findFirstVPTPredOperandIdx(*Iter); + assert(OpIdx != -1); + + // Update the mask + change the predicate to an else if needed. + if (ChangeToElse) { + // Change the predicate and update the mask + Iter->getOperand(OpIdx).setImm(ARMVCC::Else); + BlockMask = ExpandBlockMask(BlockMask, 1, ARMVCC::Else); + // Reset back to a "then" predicate if this instruction defines VPR. + if (Iter->definesRegister(ARM::VPR)) + ChangeToElse = false; + } else + BlockMask = ExpandBlockMask(BlockMask, 1, ARMVCC::Then); + + LLVM_DEBUG(dbgs() << " adding: "; Iter->dump()); + } + + if (CurrentPredicate == ARMVCC::Then) + CurrentPredicate = ARMVCC::Else; + else + CurrentPredicate = ARMVCC::Then; + } + return BlockMask; +} + bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { bool Modified = false; MachineBasicBlock::instr_iterator MBIter = Block.instr_begin(); MachineBasicBlock::instr_iterator EndIter = Block.instr_end(); + SmallVector DeadInstructions; + while (MBIter != EndIter) { MachineInstr *MI = &*MBIter; unsigned PredReg = 0; - DebugLoc dl = MI->getDebugLoc(); + DebugLoc DL = MI->getDebugLoc(); ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg); // The idea of the predicate is that None, Then and Else are for use when // handling assembly language: they correspond to the three possible // suffixes "", "t" and "e" on the mnemonic. So when instructions are read - // from assembly source or disassembled from object code, you expect to see - // a mixture whenever there's a long VPT block. But in code generation, we - // hope we'll never generate an Else as input to this pass. + // from assembly source or disassembled from object code, you expect to + // see a mixture whenever there's a long VPT block. But in code + // generation, we hope we'll never generate an Else as input to this pass. assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds"); if (Pred == ARMVCC::None) { @@ -119,41 +280,24 @@ continue; } - LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump()); - int VPTInstCnt = 1; - ARMVCC::VPTCodes NextPred; - - // Look at subsequent instructions, checking if they can be in the same VPT - // block. - ++MBIter; - while (MBIter != EndIter && VPTInstCnt < 4) { - NextPred = getVPTInstrPredicate(*MBIter, PredReg); - assert(NextPred != ARMVCC::Else && - "VPT block pass does not expect Else preds"); - if (NextPred != Pred) - break; - LLVM_DEBUG(dbgs() << " adding : "; MBIter->dump()); - ++VPTInstCnt; - ++MBIter; - }; - - unsigned BlockMask = getARMVPTBlockMask(VPTInstCnt); + uint64_t BlockMask = + (uint64_t)CreateVPTBlock(MBIter, EndIter, DeadInstructions); - // Search back for a VCMP that can be folded to create a VPT, or else create - // a VPST directly + // Search back for a VCMP that can be folded to create a VPT, or else + // create a VPST directly MachineInstrBuilder MIBuilder; unsigned NewOpcode; - MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode); - if (VCMP) { + LLVM_DEBUG(dbgs() << " final block mask: " << BlockMask << "\n"); + if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) { LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump()); - MIBuilder = BuildMI(Block, MI, dl, TII->get(NewOpcode)); + MIBuilder = BuildMI(Block, MI, DL, TII->get(NewOpcode)); MIBuilder.addImm(BlockMask); MIBuilder.add(VCMP->getOperand(1)); MIBuilder.add(VCMP->getOperand(2)); MIBuilder.add(VCMP->getOperand(3)); VCMP->eraseFromParent(); } else { - MIBuilder = BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST)); + MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST)); MIBuilder.addImm(BlockMask); } @@ -162,6 +306,15 @@ Modified = true; } + + // Erase all dead instructions + for (MachineInstr *DeadMI : DeadInstructions) { + if (DeadMI->isInsideBundle()) + DeadMI->eraseFromBundle(); + else + DeadMI->eraseFromParent(); + } + return Modified; } Index: llvm/lib/Target/ARM/Thumb2InstrInfo.h =================================================================== --- llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -74,6 +74,10 @@ int findFirstVPTPredOperandIdx(const MachineInstr &MI); ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, unsigned &PredReg); +inline ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI) { + unsigned PredReg; + return getVPTInstrPredicate(MI, PredReg); } +} // namespace llvm #endif Index: llvm/test/CodeGen/Thumb2/mve-pred-not.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-pred-not.ll +++ llvm/test/CodeGen/Thumb2/mve-pred-not.ll @@ -383,11 +383,9 @@ define arm_aapcs_vfpcc <4 x i32> @vpnot_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: vpnot_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vpt.s32 lt, q0, zr +; CHECK-NEXT: vpte.s32 lt, q0, zr ; CHECK-NEXT: vcmpt.s32 gt, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vcmpt.i32 eq, q2, zr +; CHECK-NEXT: vcmpe.i32 eq, q2, zr ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr entry: @@ -400,3 +398,73 @@ %s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b ret <4 x i32> %s } + +declare <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) + +define arm_aapcs_vfpcc <4 x i32> @vpttet_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: vpttet_v4i1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q3, q2 +; CHECK-NEXT: vpttet.s32 ge, q0, q2 +; CHECK-NEXT: vmaxt.s32 q3, q0, q1 +; CHECK-NEXT: vcmpt.s32 gt, q0, zr +; CHECK-NEXT: vcmpe.s32 gt, q1, zr +; CHECK-NEXT: vmovt q3, q2 +; CHECK-NEXT: vmov q0, q3 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %x, %z + %1 = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %x, <4 x i32> %y, i32 0, <4 x i1> %0, <4 x i32> %z) + %2 = icmp sgt <4 x i32> %x, zeroinitializer + %3 = and <4 x i1> %0, %2 + %4 = xor <4 x i1> %3, + %5 = icmp sgt <4 x i32> %y, zeroinitializer + %6 = and <4 x i1> %5, %4 + %7 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %6, <4 x i32> %1) + ret <4 x i32> %7 +} + +define arm_aapcs_vfpcc <4 x i32> @vpttee_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: vpttee_v4i1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q3, q2 +; CHECK-NEXT: vpttee.s32 ge, q0, q2 +; CHECK-NEXT: vmaxt.s32 q3, q0, q1 +; CHECK-NEXT: vcmpt.s32 gt, q0, zr +; CHECK-NEXT: vmove q3, q2 +; CHECK-NEXT: vmove q3, q2 +; CHECK-NEXT: vmov q0, q3 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %x, %z + %1 = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %x, <4 x i32> %y, i32 0, <4 x i1> %0, <4 x i32> %z) + %2 = icmp sgt <4 x i32> %x, zeroinitializer + %3 = and <4 x i1> %0, %2 + %4 = xor <4 x i1> %3, + %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %1) + %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %5) + ret <4 x i32> %6 +} + +define arm_aapcs_vfpcc <4 x i32> @vpttte_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: vpttte_v4i1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q3, q2 +; CHECK-NEXT: vpttte.s32 ge, q0, q2 +; CHECK-NEXT: vmaxt.s32 q3, q0, q1 +; CHECK-NEXT: vcmpt.s32 gt, q0, zr +; CHECK-NEXT: vmovt q3, q2 +; CHECK-NEXT: vmove q3, q2 +; CHECK-NEXT: vmov q0, q3 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %x, %z + %1 = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %x, <4 x i32> %y, i32 0, <4 x i1> %0, <4 x i32> %z) + %2 = icmp sgt <4 x i32> %x, zeroinitializer + %3 = and <4 x i1> %0, %2 + %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %3, <4 x i32> %1) + %5 = xor <4 x i1> %3, + %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %5, <4 x i32> %4) + ret <4 x i32> %6 +} Index: llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll +++ llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll @@ -19,11 +19,9 @@ ; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vpt.s32 ge, q1, r2 +; CHECK-NEXT: vpte.s32 ge, q1, r2 ; CHECK-NEXT: vcmpt.s32 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q0, [r0], #16 +; CHECK-NEXT: vstrwe.32 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -77,11 +75,9 @@ ; CHECK-NEXT: .LBB1_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r0] -; CHECK-NEXT: vpt.s16 ge, q1, r2 +; CHECK-NEXT: vpte.s16 ge, q1, r2 ; CHECK-NEXT: vcmpt.s16 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q0, [r0], #16 +; CHECK-NEXT: vstrhe.16 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB1_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -135,11 +131,9 @@ ; CHECK-NEXT: .LBB2_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u8 q1, [r0] -; CHECK-NEXT: vpt.s8 ge, q1, r2 +; CHECK-NEXT: vpte.s8 ge, q1, r2 ; CHECK-NEXT: vcmpt.s8 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrbt.8 q0, [r0], #16 +; CHECK-NEXT: vstrbe.8 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB2_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -197,11 +191,9 @@ ; CHECK-NEXT: .LBB3_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q3, [r0] -; CHECK-NEXT: vpt.f32 le, q0, q3 +; CHECK-NEXT: vpte.f32 le, q0, q3 ; CHECK-NEXT: vcmpt.f32 le, q3, q1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q2, [r0], #16 +; CHECK-NEXT: vstrwe.32 q2, [r0], #16 ; CHECK-NEXT: le lr, .LBB3_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -259,11 +251,9 @@ ; CHECK-NEXT: .LBB4_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q3, [r0] -; CHECK-NEXT: vpt.f16 le, q1, q3 +; CHECK-NEXT: vpte.f16 le, q1, q3 ; CHECK-NEXT: vcmpt.f16 le, q3, q0 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q2, [r0], #16 +; CHECK-NEXT: vstrhe.16 q2, [r0], #16 ; CHECK-NEXT: le lr, .LBB4_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -322,11 +312,9 @@ ; CHECK-NEXT: .LBB5_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vpt.s32 ge, q1, r2 +; CHECK-NEXT: vpte.s32 ge, q1, r2 ; CHECK-NEXT: vcmpt.s32 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q0, [r0], #16 +; CHECK-NEXT: vstrwe.32 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB5_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -380,11 +368,9 @@ ; CHECK-NEXT: .LBB6_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r0] -; CHECK-NEXT: vpt.s16 ge, q1, r2 +; CHECK-NEXT: vpte.s16 ge, q1, r2 ; CHECK-NEXT: vcmpt.s16 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q0, [r0], #16 +; CHECK-NEXT: vstrhe.16 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB6_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -438,11 +424,9 @@ ; CHECK-NEXT: .LBB7_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u8 q1, [r0] -; CHECK-NEXT: vpt.s8 ge, q1, r2 +; CHECK-NEXT: vpte.s8 ge, q1, r2 ; CHECK-NEXT: vcmpt.s8 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrbt.8 q0, [r0], #16 +; CHECK-NEXT: vstrbe.8 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB7_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -500,11 +484,9 @@ ; CHECK-NEXT: .LBB8_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q3, [r0] -; CHECK-NEXT: vpt.f32 le, q0, q3 +; CHECK-NEXT: vpte.f32 le, q0, q3 ; CHECK-NEXT: vcmpt.f32 le, q3, q1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q2, [r0], #16 +; CHECK-NEXT: vstrwe.32 q2, [r0], #16 ; CHECK-NEXT: le lr, .LBB8_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -562,11 +544,9 @@ ; CHECK-NEXT: .LBB9_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q3, [r0] -; CHECK-NEXT: vpt.f16 le, q1, q3 +; CHECK-NEXT: vpte.f16 le, q1, q3 ; CHECK-NEXT: vcmpt.f16 le, q3, q0 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q2, [r0], #16 +; CHECK-NEXT: vstrhe.16 q2, [r0], #16 ; CHECK-NEXT: le lr, .LBB9_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} Index: llvm/test/CodeGen/Thumb2/mve-vpt-3-blocks-kill-vpr.mir =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vpt-3-blocks-kill-vpr.mir +++ llvm/test/CodeGen/Thumb2/mve-vpt-3-blocks-kill-vpr.mir @@ -68,14 +68,10 @@ ; CHECK: liveins: $q0, $q1, $q2, $r0 ; CHECK: $vpr = VMSR_P0 killed $r0, 14 /* CC::al */, $noreg ; CHECK: $q3 = MVE_VORR $q0, $q0, 0, $noreg, undef $q3 - ; CHECK: BUNDLE implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $vpr, implicit killed $q1, implicit $q2, implicit killed $q3 { - ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: BUNDLE implicit-def dead $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit-def $vpr, implicit-def $q1, implicit-def $d2, implicit-def $s4, implicit-def $s5, implicit-def $d3, implicit-def $s6, implicit-def $s7, implicit killed $vpr, implicit killed $q1, implicit $q2, implicit killed $q3 { + ; CHECK: MVE_VPST 12, implicit $vpr ; CHECK: renamable $q3 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q1, renamable $q2, 1, renamable $vpr, killed renamable $q3 - ; CHECK: } - ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg - ; CHECK: BUNDLE implicit-def $q1, implicit-def $d2, implicit-def $s4, implicit-def $s5, implicit-def $d3, implicit-def $s6, implicit-def $s7, implicit $vpr, implicit killed $q3, implicit undef $q1 { - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q1 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q3, renamable $q3, 1, renamable $vpr, undef renamable $q1 + ; CHECK: renamable $q1 = nnan ninf nsz MVE_VMINNMf32 internal killed renamable $q3, internal renamable $q3, 2, internal renamable $vpr, undef renamable $q1 ; CHECK: } ; CHECK: $q3 = MVE_VORR $q0, $q0, 0, $noreg, undef $q3 ; CHECK: BUNDLE implicit-def dead $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $s1, implicit-def $d1, implicit-def $s2, implicit-def $s3, implicit killed $vpr, implicit killed $q1, implicit killed $q2, implicit killed $q3, implicit killed $q0 { Index: llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir @@ -0,0 +1,229 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabi" + + define hidden arm_aapcs_vfpcc <4 x float> @vpt_block_else(<4 x float> %inactive1, <4 x float> %inactive2, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 { + entry: + %conv.i = zext i16 %p to i32 + %0 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %a, <4 x float> %b, i32 %conv.i) #2 + %1 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %0, <4 x float> %0, i32 %conv.i) #2 + %2 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive1, <4 x float> %1, <4 x float> %b, i32 %conv.i) #2 + %3 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive2, <4 x float> %2, <4 x float> %b, i32 %conv.i) #2 + ret <4 x float> %3 + } + + declare <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float>, <4 x float>, <4 x float>, i32) #1 + + attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "frame-pointer"="none" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { nounwind readnone } + attributes #2 = { nounwind } + +... +--- +name: vpt_block_else +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$q0', virtual-reg: '' } + - { reg: '$q1', virtual-reg: '' } + - { reg: '$q2', virtual-reg: '' } + - { reg: '$q3', virtual-reg: '' } + - { reg: '$r0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +constants: [] +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: vpt_block_else + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit killed $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { + ; CHECK: MVE_VPTv4s32 5, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, internal killed renamable $vpr + ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit killed $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { + ; CHECK: MVE_VPTv4s32 7, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, internal killed renamable $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { + ; CHECK: MVE_VPTv4s32 13, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { + ; CHECK: MVE_VPTv4s32 9, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3 { + ; CHECK: MVE_VPTv4s32 15, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3 { + ; CHECK: MVE_VPTv4s32 14, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { + ; CHECK: MVE_VPTv4s32 10, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3 { + ; CHECK: MVE_VPTv4s32 6, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, internal killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def $vpr, implicit-def dead $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $s1, implicit-def $d1, implicit-def $s2, implicit-def $s3, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3 { + ; CHECK: MVE_VPTv4s32 11, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, internal killed renamable $q3 + ; CHECK: $q0 = MVE_VORR internal killed $q3, internal killed $q3, 2, $noreg, undef $q0 + ; CHECK: } + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0 + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, killed renamable $vpr + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, renamable $q3 + $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, killed renamable $vpr + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, renamable $q3 + $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + $q0 = MVE_VORR killed $q3, killed $q3, 1, $noreg, undef $q0 + + tBX_RET 14, $noreg, implicit $q0 + +... Index: llvm/test/CodeGen/Thumb2/mve-vpt-nots.mir =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vpt-nots.mir +++ llvm/test/CodeGen/Thumb2/mve-vpt-nots.mir @@ -61,14 +61,10 @@ ; CHECK-LABEL: name: vpnot ; CHECK: liveins: $q0, $q1, $q2 - ; CHECK: BUNDLE implicit-def $vpr, implicit $q0, implicit $zr, implicit $q1 { - ; CHECK: MVE_VPTv4s32r 8, renamable $q0, $zr, 11, implicit-def $vpr + ; CHECK: BUNDLE implicit-def $vpr, implicit $q0, implicit $zr, implicit $q1, implicit killed $q2 { + ; CHECK: MVE_VPTv4s32r 12, renamable $q0, $zr, 11, implicit-def $vpr ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, internal killed renamable $vpr - ; CHECK: } - ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg - ; CHECK: BUNDLE implicit-def $vpr, implicit killed $vpr, implicit killed $q2, implicit $zr { - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $vpr = MVE_VCMPi32r killed renamable $q2, $zr, 0, 1, killed renamable $vpr + ; CHECK: renamable $vpr = MVE_VCMPi32r killed renamable $q2, $zr, 0, 2, internal killed renamable $vpr ; CHECK: } ; CHECK: renamable $q0 = MVE_VPSEL killed renamable $q0, killed renamable $q1, 0, killed renamable $vpr ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0 @@ -244,14 +240,10 @@ ; CHECK: liveins: $q0, $q1, $q2 ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q0, $zr, 11, 0, $noreg ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg - ; CHECK: BUNDLE implicit-def $vpr, implicit killed $vpr, implicit $q1, implicit $zr { - ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: BUNDLE implicit-def $vpr, implicit killed $vpr, implicit $q1, implicit $zr, implicit killed $q2 { + ; CHECK: MVE_VPST 12, implicit $vpr ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr - ; CHECK: } - ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg - ; CHECK: BUNDLE implicit-def $vpr, implicit killed $vpr, implicit killed $q2, implicit $zr { - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $vpr = MVE_VCMPi32r killed renamable $q2, $zr, 0, 1, killed renamable $vpr + ; CHECK: renamable $vpr = MVE_VCMPi32r killed renamable $q2, $zr, 0, 2, internal killed renamable $vpr ; CHECK: } ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg ; CHECK: renamable $q0 = MVE_VPSEL killed renamable $q0, killed renamable $q1, 0, killed renamable $vpr