diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -1222,6 +1222,7 @@ include "ARMScheduleR52.td" include "ARMScheduleA57.td" include "ARMScheduleM4.td" +include "ARMScheduleM55.td" include "ARMScheduleM7.td" //===----------------------------------------------------------------------===// @@ -1497,7 +1498,7 @@ FeatureHasNoBranchPredictor, FeatureFixCMSE_CVE_2021_35465]>; -def : ProcessorModel<"cortex-m55", CortexM4Model, [ARMv81mMainline, +def : ProcessorModel<"cortex-m55", CortexM55Model, [ARMv81mMainline, FeatureDSP, FeatureFPARMv8_D16, FeatureUseMISched, diff --git a/llvm/lib/Target/ARM/ARMScheduleM55.td b/llvm/lib/Target/ARM/ARMScheduleM55.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/ARM/ARMScheduleM55.td @@ -0,0 +1,478 @@ +//==- ARMScheduleM55.td - Arm Cortex-M55 Scheduling Definitions -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the scheduling model for the Arm Cortex-M55 processors. +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// Cortex-M55 is a lot like the M4/M33 in terms of scheduling. It technically +// has an extra pipeline stage but that is unimportant for scheduling, just +// starting our model a stage later. The main points of interest over an +// Cortex-M4 are MVE instructions and the ability to dual issue thumb1 +// instructions. +// +// +// MVE +// +// The EPU pipelines now include both MVE and FP instructions. It has four +// pipelines across 4 stages (E1-E4). These pipelines are "control", +// "load/store", "integer" and "float/mul". We start the schedule at E2 to line +// up with the rest of the pipeline we model, and take the latency as the time +// between reading registers (almost always in E2) and register write (or +// forward, if it allows it). This mean that a lot of instructions (including +// loads) actually take 1 cycle (amazingly). +// +// Each MVE instruction needs to take 2 beats, each performing 64bits of the +// 128bit vector operation. So long as the beats are to different pipelines, +// the execution of the first-beat-of-the-second-instruction can overlap with +// the second-beat-of-the-first. For example a sequence of VLDR;VADD;VMUL;VSTR +// can look like this is a pipeline: +// 1 2 3 4 5 +// LD/ST : VLDR VLDR VSTR VSTR +// INTEGER: VADD VADD +// FP/MUL : VMUL VMUL +// +// But a sequence of VLDR;VLDRB;VADD;VSTR because the loads cannot overlap, +// looks like: +// 1 2 3 4 5 6 +// LD/ST : VLDR VLDR VLDRB VLDRB VSTR VSTR +// INTEGER: VADD VADD +// +// For this schedule, we currently model latencies and pipelines well for each +// instruction. MVE instruction take two beats, modelled using +// ResourceCycles=[2]. +// +// +// Dual Issue +// +// Cortex-M55 can dual issue two 16-bit T1 instructions providing one is one of +// NOPs, ITs, Brs, ADDri/SUBri, UXTB/H, SXTB/H and MOVri's. NOPs and IT's are +// not relevant (they will not appear when scheduling), Brs are only at the end +// of the block. The others are more useful, and where the problems arise. +// +// The first problem comes from the fact that we will only be seeing Thumb2 +// instructions at the point in the pipeline where we do the scheduling. The +// Thumb2SizeReductionPass has not been run yet. Especially pre-ra scheduling +// (where the scheduler has the most freedom) we can only really guess at which +// instructions will become thumb1 instructions. We are quite optimistic, and +// may get some things wrong as a result. +// +// The other problem is one of telling llvm what to do exactly. The way we +// attempt to meld this is: +// Set IssueWidth to 2 to allow 2 instructions per cycle. +// All instructions we cannot dual issue are "SingleIssue=1" (MVE/FP and T2 +// instructions) +// We guess at another set of instructions that will become T1 instruction. +// These become the primary instruction in a dual issue pair (the normal +// one). These use normal resources and latencies, but set SingleIssue = 0. +// We guess at another set of instructions that will be shrank down into T1 DI +// instructions (add, sub, mov's, etc), which become the secondary. These +// don't use a resource, and set SingleIssue = 0. +// +// So our guessing is a bit rough. It may be possible to improve this by moving +// T2SizeReduction pass earlier in the pipeline, for example, so that at least +// Post-RA scheduling sees what is T1/T2. It may also be possible to write a +// custom instruction matcher for more accurately guess at T1 instructions. + + +def CortexM55Model : SchedMachineModel { + let MicroOpBufferSize = 0; // Explicitly set to zero since M55 is in-order. + let IssueWidth = 2; // There is some dual-issue support in M55. + let MispredictPenalty = 3; // Default is 10 + let LoadLatency = 4; // Default is 4 + let PostRAScheduler = 1; + let FullInstRWOverlapCheck = 1; + + let CompleteModel = 0; + let UnsupportedFeatures = [IsARM, HasNEON, HasDotProd, HasMatMulInt8, HasZCZ, + IsNotMClass, HasV8, HasV8_3a, HasTrustZone, HasDFB, + IsWindows]; +} + + +let SchedModel = CortexM55Model in { + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +// Modeling each pipeline as a ProcResource using the BufferSize = 0 since +// M55 is in-order. +def M55UnitALU : ProcResource<1> { let BufferSize = 0; } // Int ALU +def M55UnitVecALU : ProcResource<1> { let BufferSize = 0; } // MVE integer pipe +def M55UnitVecFPALU : ProcResource<1> { let BufferSize = 0; } // MVE float pipe +def M55UnitLoadStore : ProcResource<1> { let BufferSize = 0; } // MVE load/store pipe +def M55UnitVecSys : ProcResource<1> { let BufferSize = 0; } // MVE control/sys pipe + +// Some VMOV's can go down either pipeline. FIXME: This M55Write2IntFPE2 is +// intended to model the VMOV taking either Int or FP for 2 cycles. It is not +// clear if the llvm scheduler is using it like we want though. +def M55UnitVecIntFP: ProcResGroup<[M55UnitVecALU, M55UnitVecFPALU]>; + + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedWrite types which both map the ProcResources and +// set the latency. + +//=====// +// ALU // +//=====// + +// Generic writes for Flags, GRPs and other extra operands (eg post-inc, vadc flags, vaddlv etc) +def M55WriteLat0 : SchedWriteRes<[]> { let Latency = 0; let NumMicroOps = 0; } +def M55WriteLat1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; } +def M55WriteLat2 : SchedWriteRes<[]> { let Latency = 2; let NumMicroOps = 0; } + +// DX instructions are ALU instructions that take a single cycle. The +// instructions that may be shrank to T1 (and can be dual issued) are +// SingleIssue = 0. The others are SingleIssue = 1. +let SingleIssue = 0, Latency = 1 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def M55WriteDX_DI : SchedWriteRes<[M55UnitALU]>; +} +let SingleIssue = 1, Latency = 1 in { + def : WriteRes; + def M55WriteDX_SI : SchedWriteRes<[M55UnitALU]>; +} + +def : InstRW<[M55WriteDX_SI], (instregex "t2BF[CI]", "t2CPS", "t2DBG", + "t2MRS", "t2MSR", "t2SEL", "t2SG", "t2TT")>; +def : InstRW<[M55WriteDX_SI], (instregex "t2SUBS_PC_LR", "COPY")>; +def : InstRW<[M55WriteDX_SI], (instregex "t2CS(EL|INC|INV|NEG)")>; +// Thumb 2 instructions that could be reduced to a thumb 1 instruction and can +// be dual issued with one of the above. This list is optimistic. +def : InstRW<[M55WriteDX_DI], (instregex "t2ADDC?rr$", "t2ADDrr$", + "t2ADDSrr$", "t2ANDrr$", "t2ASRr[ir]$", "t2BICrr$", "t2CMNzrr$", + "t2CMPr[ir]$", "t2EORrr$", "t2LSLr[ir]$", "t2LSRr[ir]$", "t2MVNr$", + "t2ORRrr$", "t2REV(16|SH)?$", "t2RORrr$", "t2RSBr[ir]$", "t2RSBSri$", + "t2SBCrr$", "t2SUBS?rr$", "t2TEQrr$", "t2TSTrr$", "t2STRi12$", + "t2STRs$", "t2STRBi12$", "t2STRBs$", "t2STRHi12$", "t2STRHs$", + "t2STR_POST$", "t2STMIA$", "t2STMIA_UPD$", "t2STMDB$", "t2STMDB_UPD$")>; +def : InstRW<[M55WriteDX_DI], (instregex "t2SETPAN$", "tADC$", "tADDhirr$", + "tADDrSP$", "tADDrSPi$", "tADDrr$", "tADDspi$", "tADDspr$", "tADR$", + "tAND$", "tASRri$", "tASRrr$", "tBIC$", "tBKPT$", "tCBNZ$", "tCBZ$", + "tCMNz$", "tCMPhir$", "tCMPi8$", "tCMPr$", "tCPS$", "tEOR$", "tHINT$", + "tHLT$", "tLSLri$", "tLSLrr$", "tLSRri$", "tLSRrr$", "tMOVSr$", + "tMUL$", "tMVN$", "tORR$", "tPICADD$", "tPOP$", "tPUSH$", "tREV$", + "tREV16$", "tREVSH$", "tROR$", "tRSB$", "tSBC$", "tSETEND$", + "tSTMIA_UPD$", "tSTRBi$", "tSTRBr$", "tSTRHi$", "tSTRHr$", "tSTRi$", + "tSTRr$", "tSTRspi$", "tSUBrr$", "tSUBspi$", "tSVC$", "tTRAP$", + "tTST$", "tUDF$")>; +def : InstRW<[M55WriteDX_DI], (instregex "tB$", "tBLXNSr$", "tBLXr$", "tBX$", + "tBXNS$", "tBcc$")>; + + +// CX instructions take 2 (or more) cycles. Again T1 instructions may be dual +// issues (SingleIssue = 0) +let SingleIssue = 0, Latency = 2 in { + def : WriteRes; + def M55WriteCX_DI : SchedWriteRes<[M55UnitALU]>; +} +let SingleIssue = 1, Latency = 2 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def M55WriteCX_SI : SchedWriteRes<[M55UnitALU]>; +} + +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : WriteRes { let Latency = 2; } +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : WriteRes { let Latency = 2; } + +def : InstRW<[M55WriteCX_SI], (instregex "t2CDP", "t2CLREX", "t2[DI][MS]B", + "t2MCR", "t2MOVSs[ir]", "t2MRC", "t2MUL", "t2STC")>; +def : InstRW<[M55WriteCX_SI], (instregex "t2Q", "t2[SU](ADD|ASX|BFX|DIV)", + "t2[SU]H(ADD|ASX|SUB|SAX)", "t2SM[LM]", "t2S(SAT|SUB|SAX)", "t2UQ", + "t2USA", "t2USUB", "t2UXTA[BH]")>; +def : InstRW<[M55WriteCX_SI], (instregex "t2LD[AC]", "t2STL", "t2STRD")>; +def : InstRW<[M55WriteCX_SI], (instregex "MVE_[SU]Q?R?SH[LR]$")>; +def : InstRW<[M55WriteCX_SI, M55WriteLat2], (instregex "MVE_ASRL", "MVE_LSLL", + "MVE_LSRL", "MVE_[SU]Q?R?SH[LR]L")>; +// This may be higher in practice, but that likely doesn't make a difference +// for scheduling +def : InstRW<[M55WriteCX_SI], (instregex "t2CLRM")>; + +def : InstRW<[M55WriteCX_DI], (instregex "t2LDR[BH]?i12$", "t2LDRS?[BH]?s$", + "t2LDM")>; +def : InstRW<[M55WriteCX_DI], (instregex "tLDM", "tLDRBi$", "tLDRBr$", + "tLDRHi$", "tLDRHr$", "tLDRSB$", "tLDRSH$", "tLDRi$", "tLDRpci$", + "tLDRr$", "tLDRspi$")>; + +// Dual Issue instructions +let Latency = 1, SingleIssue = 0 in { + def : WriteRes; + def M55WriteDI : SchedWriteRes<[]>; +} + +def : InstRW<[M55WriteDI], (instregex "tADDi[38]$", "tSUBi[38]$", "tMOVi8$", + "tMOVr$", "tUXT[BH]$", "tSXT[BH]$")>; +// Thumb 2 instructions that could be reduced to a dual issuable Thumb 1 +// instruction above. +def : InstRW<[M55WriteDI], (instregex "t2ADDS?ri$", "t2MOV[ir]$", "t2MOVi16$", + "t2MOVr$", "t2SUBS?ri$", "t2[US]XT[BH]$")>; +def : InstRW<[M55WriteDI], (instregex "t2IT", "IT")>; + + +def : InstRW<[M55WriteLat0], (instregex "t2LoopDec")>; + +// Forwarding + +// No forwarding in the ALU normally +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +//=============// +// MVE and VFP // +//=============// + +// The Writes that take ResourceCycles=[2] are MVE instruction, the others VFP. + +let SingleIssue = 1, Latency = 1 in { + def M55WriteLSE2 : SchedWriteRes<[M55UnitLoadStore]>; + def M55WriteIntE2 : SchedWriteRes<[M55UnitVecALU]>; + def M55WriteFloatE2 : SchedWriteRes<[M55UnitVecFPALU]>; + def M55WriteSysE2 : SchedWriteRes<[M55UnitVecSys]>; + + def M55Write2LSE2 : SchedWriteRes<[M55UnitLoadStore]> { let ResourceCycles=[2]; } + def M55Write2IntE2 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; } + def M55Write2FloatE2 : SchedWriteRes<[M55UnitVecFPALU]> { let ResourceCycles=[2]; } + def M55Write2IntFPE2 : SchedWriteRes<[M55UnitVecIntFP]> { let ResourceCycles=[2]; } +} + +let SingleIssue = 1, Latency = 2 in { + def M55WriteLSE3 : SchedWriteRes<[M55UnitLoadStore]>; + def M55WriteIntE3 : SchedWriteRes<[M55UnitVecALU]>; + def M55WriteFloatE3 : SchedWriteRes<[M55UnitVecFPALU]>; + + def M55Write2LSE3 : SchedWriteRes<[M55UnitLoadStore]> { let ResourceCycles=[2]; } + def M55Write2IntE3 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; } + def M55Write2FloatE3 : SchedWriteRes<[M55UnitVecFPALU]> { let ResourceCycles=[2]; } +} + +let SingleIssue = 1, Latency = 3 in { + def M55Write2IntE3Plus1 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; } + + // Same as M55Write2IntE3/M55Write2FloatE3 above, but longer latency and no forwarding into stores + def M55Write2IntE4NoFwd : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; } + def M55Write2FloatE4NoFwd : SchedWriteRes<[M55UnitVecFPALU]> { let ResourceCycles=[2]; } +} +let SingleIssue = 1, Latency = 4 in { + def M55Write2IntE3Plus2 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; } + def M55WriteFloatE3Plus2 : SchedWriteRes<[M55UnitVecFPALU]>; +} +let SingleIssue = 1, Latency = 9 in { + def M55WriteFloatE3Plus7 : SchedWriteRes<[M55UnitVecFPALU]>; +} +let SingleIssue = 1, Latency = 15 in { + def M55WriteFloatE3Plus13 : SchedWriteRes<[M55UnitVecFPALU]>; +} +let SingleIssue = 1, Latency = 16 in { + def M55WriteFloatE3Plus14 : SchedWriteRes<[M55UnitVecFPALU]>; +} +let SingleIssue = 1, Latency = 21 in { + def M55WriteFloatE3Plus19 : SchedWriteRes<[M55UnitVecFPALU]>; +} +// VMUL (Double precision) + VADD (Double precision) +let SingleIssue = 1, Latency = 24 in { + def M55WriteFloatE3Plus22 : SchedWriteRes<[M55UnitVecFPALU]>; +} +let SingleIssue = 1, Latency = 30 in { + def M55WriteFloatE3Plus28 : SchedWriteRes<[M55UnitVecFPALU]>; +} +let SingleIssue = 1, Latency = 36 in { + def M55WriteFloatE3Plus34 : SchedWriteRes<[M55UnitVecFPALU]>; +} + +def M55Read0 : SchedReadAdvance<0>; +def M55Read1 : SchedReadAdvance<1, [M55Write2LSE3, M55Write2IntE3, M55Write2FloatE3]>; +def M55GatherQRead : SchedReadAdvance<-4>; + +// MVE instructions + +// Loads and Stores of different kinds + +// Normal loads +def : InstRW<[M55Write2LSE2], (instregex "MVE_VLDR(B|H|W)(S|U)(8|16|32)$")>; +// Pre/post inc loads +def : InstRW<[M55WriteLat1, M55Write2LSE2], (instregex "MVE_VLDR(B|H|W)(S|U)(8|16|32)_(post|pre)$")>; +// Gather loads +def : InstRW<[M55Write2LSE3, M55Read0, M55GatherQRead], (instregex "MVE_VLDR(B|H|W|D)(S|U)(8|16|32|64)_rq")>; +def : InstRW<[M55Write2LSE3, M55GatherQRead], (instregex "MVE_VLDR(B|H|W|D)(S|U)(8|16|32|64)_qi$")>; +def : InstRW<[M55WriteLat1, M55Write2LSE3, M55GatherQRead], (instregex "MVE_VLDR(W|D)U(32|64)_qi_pre$")>; +// Interleaving loads +def : InstRW<[M55Write2LSE2], (instregex "MVE_VLD[24][0-3]_(8|16|32)$")>; +// Interleaving loads with wb +def : InstRW<[M55Write2LSE2, M55WriteLat1], (instregex "MVE_VLD[24][0-3]_(8|16|32)_wb$")>; + +// Normal stores +def : InstRW<[M55Write2LSE2, M55Read1], (instregex "MVE_VSTR(B|H|W)U?(8|16|32)$")>; +// Pre/post inc stores +def : InstRW<[M55Write2LSE2, M55Read1], (instregex "MVE_VSTR(B|H|W)U?(8|16|32)_(post|pre)$")>; +// Scatter stores +def : InstRW<[M55Write2LSE2, M55Read0, M55Read0, M55GatherQRead], (instregex "MVE_VSTR(B|H|W|D)(8|16|32|64)_rq")>; +def : InstRW<[M55Write2LSE2, M55Read0, M55GatherQRead], (instregex "MVE_VSTR(B|H|W|D)(8|16|32|64)_qi")>; +// Interleaving stores +def : InstRW<[M55Write2LSE2], (instregex "MVE_VST(2|4)")>; + +// Integer pipe operations + +def : InstRW<[M55Write2IntE3Plus1], (instregex "MVE_VABAV")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VABD(u|s)")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VABS(u|s)")>; +def : InstRW<[M55Write2IntE3], (instregex "MVE_VADC")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VADD(_qr_)?i")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VAND")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VBIC")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VBRSR")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VCADDi")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VCLS")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VCLZ")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_V(D|I)?W?DUP")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VEOR")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VHADD")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VHCADD")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VHSUB")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_V(MAX|MIN)A?(s|u)")>; +def : InstRW<[M55Write2IntE3], (instregex "MVE_V(MAX|MIN)A?V(s|u)8")>; +def : InstRW<[M55Write2IntE3Plus1], (instregex "MVE_V(MAX|MIN)A?V(s|u)16")>; +def : InstRW<[M55Write2IntE3Plus2], (instregex "MVE_V(MAX|MIN)A?V(s|u)32")>; +def : InstRW<[M55Write2IntE4NoFwd], (instregex "MVE_VMOVN")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VMOVL")>; +def : InstRW<[M55Write2IntE3], (instregex "MVE_VMULL[BT]p")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VMVN")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VNEG(u|s)")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VORN")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VORR")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VPSEL")>; +def : InstRW<[M55Write2IntE2], (instregex "MQPRCopy")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VQABS")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VQADD")>; +def : InstRW<[M55Write2IntE4NoFwd], (instregex "MVE_VQMOV")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VQNEG")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VSHL")>; +def : InstRW<[M55Write2IntE3], (instregex "MVE_V[QR]SHL")>; +def : InstRW<[M55Write2IntE3], (instregex "MVE_VQRSHL")>; +def : InstRW<[M55Write2IntE4NoFwd], (instregex "MVE_VQ?R?SHRU?N")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VSHR_")>; +def : InstRW<[M55Write2IntE3], (instregex "MVE_VRSHR_")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VQSUB")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VREV")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VRHADD")>; +def : InstRW<[M55Write2IntE3], (instregex "MVE_VSBC")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VSLI")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VSRI")>; +def : InstRW<[M55Write2IntE2], (instregex "MVE_VSUB(_qr_)?i")>; + +// FP/Mul pipe operations. + +def : InstRW<[M55Write2FloatE2], (instregex "MVE_VABDf")>; +def : InstRW<[M55Write2FloatE2], (instregex "MVE_VABSf")>; +def : InstRW<[M55Write2FloatE2], (instregex "MVE_VADDf")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VADD_qr_f")>; +def : InstRW<[M55Write2FloatE3, M55WriteLat1], (instregex "MVE_VADDLV")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VADDV")>; +def : InstRW<[M55Write2FloatE2], (instregex "MVE_VCADDf")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCMLA")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCMUL")>; +def : InstRW<[M55Write2FloatE2], (instregex "MVE_VCMP(i|s|u)", "MVE_VPTv(4|8|16)(i|s|u)")>; +def : InstRW<[M55Write2FloatE2], (instregex "MVE_VCMPf", "MVE_VPTv(4|8)f")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVTf16(u|s)16")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVTf32(u|s)32")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVT(u|s)16f16")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVT(u|s)32f32")>; +def : InstRW<[M55Write2FloatE4NoFwd], (instregex "MVE_VCVTf16f32")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVTf32f16")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VFM(A|S)")>; +def : InstRW<[M55Write2FloatE2], (instregex "MVE_V(MIN|MAX)NM")>; +def : InstRW<[M55Write2FloatE2], (instregex "MVE_VMOV_from_lane")>; +def : InstRW<[M55Write2FloatE2], (instregex "MVE_VMOV_rr_q")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VMOVi")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VMUL(_qr_)?[if]")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQ?R?D?MULH")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQ?D?MULL[TB]?[su]")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQDMULL_qr_")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQ?R?D?ML(A|S)[^L]")>; +def : InstRW<[M55Write2FloatE3, M55WriteLat1], (instregex "MVE_VR?ML(A|S)L")>; +def : InstRW<[M55Write2FloatE2], (instregex "MVE_VNEGf")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VRINTf")>; +def : InstRW<[M55Write2FloatE2], (instregex "MVE_VSUBf")>; +def : InstRW<[M55Write2FloatE3], (instregex "MVE_VSUB_qr_f")>; + +// Some VMOV's can go down either pipeline. +def : InstRW<[M55Write2IntFPE2], (instregex "MVE_VMOV_to_lane", "MVE_VMOV_q_rr")>; + +def : InstRW<[M55WriteSysE2], (instregex "MVE_VCTP")>; +def : InstRW<[M55WriteSysE2], (instregex "MVE_VPNOT")>; +def : InstRW<[M55WriteSysE2], (instregex "MVE_VPST")>; + + +// VFP instructions + +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : ReadAdvance; +def : ReadAdvance; + +def : InstRW<[M55WriteLSE3], (instregex "VLD")>; +def : InstRW<[M55WriteLSE2], (instregex "VST")>; +def : InstRW<[M55WriteLSE3], (instregex "VLLD", "VLST")>; + +def : InstRW<[M55WriteFloatE3], (instregex "VABS(H|S|D)")>; +def : InstRW<[M55WriteFloatE3], (instregex "VCVT(A|M|N|P|R|X|Z)(S|U)(H|S|D)")>; +def : InstRW<[M55WriteFloatE3], (instregex "VCVT(B|T)(DH|HD)")>; +def : InstRW<[M55WriteFloatE2], (instregex "VCMPZ?(E|H|S|D)")>; +def : InstRW<[M55WriteFloatE3Plus7], (instregex "VDIVH")>; +def : InstRW<[M55WriteFloatE3], (instregex "VFN?M(A|S)(H|S)")>; // VFMA +def : InstRW<[M55WriteFloatE3Plus22], (instregex "VFN?M(A|S)D")>; // VFMA +def : InstRW<[M55WriteFloatE3], (instregex "VFP_V(MAX|MIN)NM")>; +def : InstRW<[M55WriteFloatE3], (instregex "VINSH$", "VMOVH$", "VMOVHR$", "VMOVSR$", "VMOVDRR$")>; // VINS, VMOVX, to-FP reg movs +def : InstRW<[M55WriteFloatE2], (instregex "VMOVD$", "VMOVS$", "VMOVR")>; // Other VMOV's +def : InstRW<[M55WriteFloatE2], (instregex "FCONSTH", "FCONSTS", "FCONSTD")>; +def : InstRW<[M55WriteFloatE2], (instregex "VGETLNi32", "VSETLNi32")>; +def : InstRW<[M55WriteFloatE2], (instregex "VMSR", "VMRS")>; +def : InstRW<[M55WriteFloatE3Plus2], (instregex "VN?ML(A|S)H")>; // VMLA +def : InstRW<[M55WriteFloatE3], (instregex "VNEG(H|S|D)")>; +def : InstRW<[M55WriteFloatE3], (instregex "VRINT(A|M|N|P|R|X|Z)(H|S|D)")>; +def : InstRW<[M55WriteFloatE3], (instregex "VSEL..(H|S|D)")>; +def : InstRW<[M55WriteFloatE3Plus7], (instregex "VSQRTH")>; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +} diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll @@ -191,41 +191,43 @@ ; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: sub sp, #64 +; CHECK-NEXT: sub sp, #80 ; CHECK-NEXT: ldrsh.w r12, [r2, #2] ; CHECK-NEXT: cmp.w r12, #1 ; CHECK-NEXT: blt.w .LBB1_6 ; CHECK-NEXT: @ %bb.1: @ %for.cond3.preheader.lr.ph ; CHECK-NEXT: ldrsh.w r2, [r2] ; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: blt.w .LBB1_6 +; CHECK-NEXT: blt .LBB1_6 ; CHECK-NEXT: @ %bb.2: @ %for.cond3.preheader.us.preheader -; CHECK-NEXT: ldr r7, [sp, #152] -; CHECK-NEXT: movs r4, #252 -; CHECK-NEXT: lsls r6, r3, #3 -; CHECK-NEXT: and.w r4, r4, r3, lsr #3 -; CHECK-NEXT: uxtb r6, r6 +; CHECK-NEXT: ldr r7, [sp, #168] ; CHECK-NEXT: movs r5, #120 -; CHECK-NEXT: mul lr, r4, r7 +; CHECK-NEXT: lsls r6, r3, #3 +; CHECK-NEXT: movs r4, #252 ; CHECK-NEXT: and.w r5, r5, r3, lsr #9 +; CHECK-NEXT: uxtb r6, r6 +; CHECK-NEXT: and.w r3, r4, r3, lsr #3 ; CHECK-NEXT: muls r6, r7, r6 -; CHECK-NEXT: vmov.i16 q0, #0x78 -; CHECK-NEXT: rsb.w r3, r7, #256 +; CHECK-NEXT: mul lr, r3, r7 +; CHECK-NEXT: vdup.16 q0, r6 +; CHECK-NEXT: vstrw.32 q0, [sp, #64] @ 16-byte Spill +; CHECK-NEXT: vdup.16 q0, lr ; CHECK-NEXT: muls r5, r7, r5 -; CHECK-NEXT: lsls r7, r1, #1 ; CHECK-NEXT: vstrw.32 q0, [sp, #48] @ 16-byte Spill -; CHECK-NEXT: vdup.16 q4, r6 +; CHECK-NEXT: vmov.i16 q0, #0xfc ; CHECK-NEXT: mov.w r6, #2016 -; CHECK-NEXT: vdup.16 q0, lr -; CHECK-NEXT: movs r4, #0 -; CHECK-NEXT: vmov.i16 q2, #0xf8 -; CHECK-NEXT: vmov.i16 q5, #0xfc ; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vdup.16 q0, r5 -; CHECK-NEXT: vdup.16 q6, r6 -; CHECK-NEXT: vmov.i16 q3, #0xf800 +; CHECK-NEXT: rsb.w r3, r7, #256 +; CHECK-NEXT: lsls r7, r1, #1 ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q3, [sp] @ 16-byte Spill +; CHECK-NEXT: vdup.16 q0, r6 +; CHECK-NEXT: vmov.i16 q2, #0xf8 +; CHECK-NEXT: vmov.i16 q5, #0x78 +; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill +; CHECK-NEXT: vmov.i16 q6, #0xf800 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: vldrw.u32 q7, [sp] @ 16-byte Reload ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .LBB1_3: @ %vector.ph ; CHECK-NEXT: @ =>This Loop Header: Depth=1 @@ -237,48 +239,39 @@ ; CHECK-NEXT: @ Parent Loop BB1_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vldrh.u16 q0, [r5] -; CHECK-NEXT: vmov.f64 d6, d4 -; CHECK-NEXT: vmov.f64 d7, d5 ; CHECK-NEXT: vshl.i16 q1, q0, #3 +; CHECK-NEXT: vldrw.u32 q4, [sp, #64] @ 16-byte Reload ; CHECK-NEXT: vand q1, q1, q2 -; CHECK-NEXT: vmov q2, q4 -; CHECK-NEXT: vmla.i16 q2, q1, r3 -; CHECK-NEXT: vshr.u16 q1, q0, #3 -; CHECK-NEXT: vand q1, q1, q5 -; CHECK-NEXT: vmov.f64 d14, d10 -; CHECK-NEXT: vmov.f64 d15, d11 -; CHECK-NEXT: vmov.f64 d10, d8 -; CHECK-NEXT: vmov.f64 d11, d9 -; CHECK-NEXT: vldrw.u32 q4, [sp, #32] @ 16-byte Reload -; CHECK-NEXT: vshr.u16 q0, q0, #9 ; CHECK-NEXT: vmla.i16 q4, q1, r3 -; CHECK-NEXT: vldrw.u32 q1, [sp, #48] @ 16-byte Reload +; CHECK-NEXT: vmov.f64 d6, d4 +; CHECK-NEXT: vmov.f64 d7, d5 +; CHECK-NEXT: vldrw.u32 q1, [sp, #32] @ 16-byte Reload +; CHECK-NEXT: vshr.u16 q2, q0, #9 +; CHECK-NEXT: vshr.u16 q0, q0, #3 ; CHECK-NEXT: vand q0, q0, q1 -; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload +; CHECK-NEXT: vldrw.u32 q1, [sp, #48] @ 16-byte Reload ; CHECK-NEXT: vmla.i16 q1, q0, r3 -; CHECK-NEXT: vshr.u16 q0, q2, #11 -; CHECK-NEXT: vshr.u16 q2, q4, #5 -; CHECK-NEXT: vand q2, q2, q6 -; CHECK-NEXT: vorr q0, q2, q0 -; CHECK-NEXT: vmov.f64 d4, d6 -; CHECK-NEXT: vmov.f64 d5, d7 -; CHECK-NEXT: vldrw.u32 q3, [sp] @ 16-byte Reload -; CHECK-NEXT: vmov.f64 d8, d10 -; CHECK-NEXT: vmov.f64 d9, d11 -; CHECK-NEXT: vand q1, q1, q3 +; CHECK-NEXT: vand q2, q2, q5 +; CHECK-NEXT: vshr.u16 q0, q4, #11 +; CHECK-NEXT: vldrw.u32 q4, [sp, #16] @ 16-byte Reload +; CHECK-NEXT: vshr.u16 q1, q1, #5 +; CHECK-NEXT: vmla.i16 q4, q2, r3 +; CHECK-NEXT: vand q1, q1, q7 +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: vand q1, q4, q6 ; CHECK-NEXT: vorr q0, q0, q1 -; CHECK-NEXT: vmov.f64 d10, d14 -; CHECK-NEXT: vmov.f64 d11, d15 ; CHECK-NEXT: vstrh.16 q0, [r5], #16 +; CHECK-NEXT: vmov.f64 d4, d6 +; CHECK-NEXT: vmov.f64 d5, d7 ; CHECK-NEXT: letp lr, .LBB1_4 ; CHECK-NEXT: @ %bb.5: @ %for.cond3.for.cond.cleanup7_crit_edge.us ; CHECK-NEXT: @ in Loop: Header=BB1_3 Depth=1 ; CHECK-NEXT: adds r4, #1 -; CHECK-NEXT: cmp r4, r12 ; CHECK-NEXT: add r0, r7 +; CHECK-NEXT: cmp r4, r12 ; CHECK-NEXT: bne .LBB1_3 ; CHECK-NEXT: .LBB1_6: @ %for.cond.cleanup -; CHECK-NEXT: add sp, #64 +; CHECK-NEXT: add sp, #80 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc} diff --git a/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll b/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll --- a/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll +++ b/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll @@ -7,15 +7,15 @@ ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: mov.w lr, #500 -; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .LBB0_1: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr r2, [r0], #4 -; CHECK-NEXT: add r1, r2 +; CHECK-NEXT: ldr r2, [r1], #4 +; CHECK-NEXT: add r0, r2 ; CHECK-NEXT: le lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup -; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: pop {r7, pc} entry: br label %for.body @@ -43,8 +43,8 @@ ; CHECK-NEXT: blt .LBB1_4 ; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph ; CHECK-NEXT: mov lr, r2 -; CHECK-NEXT: dls lr, r2 ; CHECK-NEXT: mov r12, r0 +; CHECK-NEXT: dls lr, r2 ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: .p2align 2 diff --git a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll --- a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll @@ -4,69 +4,64 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32 noundef %numSamples, ptr nocapture noundef writeonly %realResult, ptr nocapture noundef writeonly %imagResult) { ; CHECK-LABEL: arm_cmplx_dot_prod_q15: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: ldr.w r12, [sp, #56] +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: ldr.w r12, [sp, #24] ; CHECK-NEXT: cmp r2, #16 ; CHECK-NEXT: blo .LBB0_5 ; CHECK-NEXT: @ %bb.1: @ %while.body.preheader -; CHECK-NEXT: lsrs r7, r2, #3 ; CHECK-NEXT: movs r6, #2 +; CHECK-NEXT: lsrs r7, r2, #3 ; CHECK-NEXT: rsb r6, r6, r2, lsr #3 ; CHECK-NEXT: movs r5, #0 ; CHECK-NEXT: cmp r7, #2 ; CHECK-NEXT: csel r7, r6, r5, hs ; CHECK-NEXT: add.w lr, r7, #1 -; CHECK-NEXT: vldrh.u16 q4, [r0], #32 -; CHECK-NEXT: vldrh.u16 q5, [r1], #32 ; CHECK-NEXT: mov r4, r5 +; CHECK-NEXT: vldrh.u16 q0, [r0], #32 ; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: mov r8, r5 +; CHECK-NEXT: vldrh.u16 q1, [r1], #32 +; CHECK-NEXT: vmlsldava.s16 r4, r7, q0, q1 ; CHECK-NEXT: vldrh.u16 q2, [r0, #-16] -; CHECK-NEXT: mov r6, r5 -; CHECK-NEXT: sub.w lr, lr, #1 +; CHECK-NEXT: vmlaldavax.s16 r8, r5, q0, q1 ; CHECK-NEXT: vldrh.u16 q3, [r1, #-16] -; CHECK-NEXT: vldrh.u16 q1, [r1], #32 -; CHECK-NEXT: vldrh.u16 q0, [r0], #32 -; CHECK-NEXT: vmlsldava.s16 r4, r7, q4, q5 +; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q3 +; CHECK-NEXT: vldrh.u16 q0, [r1], #32 +; CHECK-NEXT: sub.w lr, lr, #1 ; CHECK-NEXT: cmp.w lr, #0 -; CHECK-NEXT: vmlaldavax.s16 r6, r5, q4, q5 +; CHECK-NEXT: vldrh.u16 q1, [r0], #32 ; CHECK-NEXT: beq .LBB0_3 ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .LBB0_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmlaldavax.s16 r6, r5, q2, q3 -; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q3 -; CHECK-NEXT: vldrh.u16 q2, [r0, #-16] -; CHECK-NEXT: vmlaldavax.s16 r6, r5, q0, q1 -; CHECK-NEXT: vmlsldava.s16 r4, r7, q0, q1 -; CHECK-NEXT: vldrh.u16 q0, [r0], #32 +; CHECK-NEXT: vmlaldavax.s16 r8, r5, q2, q3 ; CHECK-NEXT: vldrh.u16 q3, [r1, #-16] -; CHECK-NEXT: vldrh.u16 q1, [r1], #32 +; CHECK-NEXT: vmlsldava.s16 r4, r7, q1, q0 +; CHECK-NEXT: vldrh.u16 q2, [r0, #-16] +; CHECK-NEXT: vmlaldavax.s16 r8, r5, q1, q0 +; CHECK-NEXT: vldrh.u16 q1, [r0], #32 +; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q3 +; CHECK-NEXT: vldrh.u16 q0, [r1], #32 ; CHECK-NEXT: le lr, .LBB0_2 ; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: mov.w lr, #14 -; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q3 -; CHECK-NEXT: vmlaldavax.s16 r6, r5, q2, q3 -; CHECK-NEXT: and.w r2, lr, r2, lsl #1 -; CHECK-NEXT: vmlaldavax.s16 r6, r5, q0, q1 +; CHECK-NEXT: vmlaldavax.s16 r8, r5, q2, q3 +; CHECK-NEXT: movs r6, #14 +; CHECK-NEXT: and.w r2, r6, r2, lsl #1 +; CHECK-NEXT: vmlaldavax.s16 r8, r5, q1, q0 ; CHECK-NEXT: vldrh.u16 q2, [r0, #-16] -; CHECK-NEXT: vmlsldava.s16 r4, r7, q0, q1 +; CHECK-NEXT: vmlsldava.s16 r4, r7, q1, q0 ; CHECK-NEXT: vldrh.u16 q0, [r1, #-16] +; CHECK-NEXT: vmlaldavax.s16 r8, r5, q2, q0 ; CHECK-NEXT: vctp.16 r2 -; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrht.u16 q1, [r0] -; CHECK-NEXT: vldrht.u16 q3, [r1] -; CHECK-NEXT: vmlaldavax.s16 r6, r5, q2, q0 ; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q0 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmlsldavat.s16 r4, r7, q1, q3 +; CHECK-NEXT: vldrht.u16 q1, [r0] ; CHECK-NEXT: cmp r2, #9 -; CHECK-NEXT: vpst -; CHECK-NEXT: vmlaldavaxt.s16 r6, r5, q1, q3 +; CHECK-NEXT: vpsttt +; CHECK-NEXT: vldrht.u16 q0, [r1] +; CHECK-NEXT: vmlsldavat.s16 r4, r7, q1, q0 +; CHECK-NEXT: vmlaldavaxt.s16 r8, r5, q1, q0 ; CHECK-NEXT: blo .LBB0_10 ; CHECK-NEXT: @ %bb.4: @ %do.body.1 ; CHECK-NEXT: subs r2, #8 @@ -75,7 +70,7 @@ ; CHECK-NEXT: vldrht.u16 q0, [r0, #16] ; CHECK-NEXT: vldrht.u16 q1, [r1, #16] ; CHECK-NEXT: vmlsldavat.s16 r4, r7, q0, q1 -; CHECK-NEXT: vmlaldavaxt.s16 r6, r5, q0, q1 +; CHECK-NEXT: vmlaldavaxt.s16 r8, r5, q0, q1 ; CHECK-NEXT: b .LBB0_10 ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .LBB0_5: @ %if.else @@ -96,22 +91,20 @@ ; CHECK-NEXT: vmlaldavax.s16 r4, r5, q0, q1 ; CHECK-NEXT: letp lr, .LBB0_7 ; CHECK-NEXT: @ %bb.8: @ %if.end.loopexit177 -; CHECK-NEXT: mov r6, r4 +; CHECK-NEXT: mov r8, r4 ; CHECK-NEXT: mov r4, r2 ; CHECK-NEXT: b .LBB0_10 ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .LBB0_9: ; CHECK-NEXT: mov r7, r4 -; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: mov.w r8, #0 ; CHECK-NEXT: mov r5, r4 ; CHECK-NEXT: .LBB0_10: @ %if.end ; CHECK-NEXT: asrl r4, r7, #6 -; CHECK-NEXT: asrl r6, r5, #6 +; CHECK-NEXT: asrl r8, r5, #6 ; CHECK-NEXT: str r4, [r3] -; CHECK-NEXT: str.w r6, [r12] -; CHECK-NEXT: vpop {d8, d9, d10, d11} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: str.w r8, [r12] +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: %cmp = icmp ugt i32 %numSamples, 15 br i1 %cmp, label %while.body.preheader, label %if.else diff --git a/llvm/test/tools/llvm-mca/ARM/m55-fp.s b/llvm/test/tools/llvm-mca/ARM/m55-fp.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/ARM/m55-fp.s @@ -0,0 +1,575 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=thumbv8.1-m.main-none-none-eabi -mcpu=cortex-m55 -instruction-tables < %s | FileCheck %s + +vabs.f16 s0, s2 +vabs.f32 s0, s2 +vabs.f64 d0, d2 +vadd.f16 s0, s2, s1 +vadd.f32 s0, s2, s1 +vadd.f64 d0, d2, d1 +vcmp.f16 s1, s2 +vcmp.f32 s1, s2 +vcmp.f64 d1, d2 +vcmp.f16 s1, #0.0 +vcmp.f32 s1, #0.0 +vcmp.f64 d1, #0.0 +vcmpe.f16 s1, s2 +vcmpe.f32 s1, s2 +vcmpe.f64 d1, d2 +vcmpe.f16 s1, #0.0 +vcmpe.f32 s1, #0.0 +vcmpe.f64 d1, #0.0 +vcvt.f32.f64 s1, d2 +vcvt.f64.f32 d1, s1 +vcvt.f16.u16 s1, s2, #8 +vcvt.f16.s16 s1, s2, #8 +vcvt.f16.u32 s1, s2, #8 +vcvt.f16.s32 s1, s2, #8 +vcvt.u16.f16 s1, s2, #8 +vcvt.s16.f16 s1, s2, #8 +vcvt.u32.f16 s1, s2, #8 +vcvt.s32.f16 s1, s2, #8 +vcvt.f32.u16 s1, s2, #8 +vcvt.f32.s16 s1, s2, #8 +vcvt.f32.u32 s1, s2, #8 +vcvt.f32.s32 s1, s2, #8 +vcvt.u16.f32 s1, s2, #8 +vcvt.s16.f32 s1, s2, #8 +vcvt.u32.f32 s1, s2, #8 +vcvt.s32.f32 s1, s2, #8 +vcvt.f64.u16 d1, d2, #8 +vcvt.f64.s16 d1, d2, #8 +vcvt.f64.u32 d1, d2, #8 +vcvt.f64.s32 d1, d2, #8 +vcvt.u16.f64 d1, d2, #8 +vcvt.s16.f64 d1, d2, #8 +vcvt.u32.f64 d1, d2, #8 +vcvt.s32.f64 d1, d2, #8 +vcvt.u32.f16 s1, s2 +vcvt.s32.f16 s1, s2 +vcvt.u32.f32 s1, s2 +vcvt.s32.f32 s1, s2 +vcvt.u32.f64 s1, d2 +vcvt.s32.f64 s1, d2 +vcvt.f16.u32 s1, s2 +vcvt.f16.s32 s1, s2 +vcvt.f32.u32 s1, s2 +vcvt.f32.s32 s1, s2 +vcvt.f64.u32 d1, s2 +vcvt.f64.s32 d1, s2 +vcvta.u32.f16 s1, s2 +vcvta.s32.f16 s1, s2 +vcvta.u32.f32 s1, s2 +vcvta.s32.f32 s1, s2 +vcvta.u32.f64 s1, d2 +vcvta.s32.f64 s1, d2 +vcvtm.u32.f16 s1, s2 +vcvtm.s32.f16 s1, s2 +vcvtm.u32.f32 s1, s2 +vcvtm.s32.f32 s1, s2 +vcvtm.u32.f64 s1, d2 +vcvtm.s32.f64 s1, d2 +vcvtn.u32.f16 s1, s2 +vcvtn.s32.f16 s1, s2 +vcvtn.u32.f32 s1, s2 +vcvtn.s32.f32 s1, s2 +vcvtn.u32.f64 s1, d2 +vcvtn.s32.f64 s1, d2 +vcvtp.u32.f16 s1, s2 +vcvtp.s32.f16 s1, s2 +vcvtp.u32.f32 s1, s2 +vcvtp.s32.f32 s1, s2 +vcvtp.u32.f64 s1, d2 +vcvtp.s32.f64 s1, d2 +vcvtb.f16.f32 s1, s2 +vcvtb.f16.f64 s1, d2 +vcvtb.f32.f16 s1, s2 +vcvtb.f64.f16 d1, s2 +vcvtr.u32.f16 s1, s2 +vcvtr.s32.f16 s1, s2 +vcvtr.u32.f32 s1, s2 +vcvtr.s32.f32 s1, s2 +vcvtr.u32.f64 s1, d2 +vcvtr.s32.f64 s1, d2 +vcvtt.f16.f32 s1, s2 +vcvtt.f16.f64 s1, d2 +vcvtt.f32.f16 s1, s2 +vcvtt.f64.f16 d1, s2 +vdiv.f16 s0, s2, s1 +vdiv.f32 s0, s2, s1 +vdiv.f64 d0, d2, d1 +vfma.f16 s0, s2, s1 +vfma.f32 s0, s2, s1 +vfma.f64 d0, d2, d1 +vfms.f16 s0, s2, s1 +vfms.f32 s0, s2, s1 +vfms.f64 d0, d2, d1 +vfnma.f16 s0, s2, s1 +vfnma.f32 s0, s2, s1 +vfnma.f64 d0, d2, d1 +vfnms.f16 s0, s2, s1 +vfnms.f32 s0, s2, s1 +vfnms.f64 d0, d2, d1 +vins.f16 s0, s1 +vmaxnm.f16 s0, s2, s1 +vmaxnm.f32 s0, s2, s1 +vmaxnm.f64 d0, d2, d1 +vminnm.f16 s0, s2, s1 +vminnm.f32 s0, s2, s1 +vminnm.f64 d0, d2, d1 +vmla.f16 s0, s2, s1 +vmla.f32 s0, s2, s1 +vmla.f64 d0, d2, d1 +vmls.f16 s0, s2, s1 +vmls.f32 s0, s2, s1 +vmls.f64 d0, d2, d1 +vmov.f16 s0, r1 +vmov.f16 r0, s1 +vmov.f32 s0, r1 +vmov.f32 r0, s1 +vmov.f64 d0, r1, r2 +vmov.f64 r0, r1, d1 +vmov s0, s1, r0, r1 +vmov r0, r1, s0, s1 +vmov.f16 s0, #1.0 +vmov.f32 s0, #1.0 +vmov.f64 d0, #1.0 +vmov.f32 s0, s1 +vmov.f64 d0, d1 +vmovx.f16 s0, s1 +vmul.f16 s0, s2, s1 +vmul.f32 s0, s2, s1 +vmul.f64 d0, d2, d1 +vneg.f16 s0, s2 +vneg.f32 s0, s2 +vneg.f64 d0, d2 +vnmla.f16 s0, s2, s1 +vnmla.f32 s0, s2, s1 +vnmla.f64 d0, d2, d1 +vnmls.f16 s0, s2, s1 +vnmls.f32 s0, s2, s1 +vnmls.f64 d0, d2, d1 +vnmul.f16 s0, s2, s1 +vnmul.f32 s0, s2, s1 +vnmul.f64 d0, d2, d1 +vrinta.f16 s0, s2 +vrinta.f32.f32 s0, s2 +vrinta.f64.f64 d0, d2 +vrintm.f16 s0, s2 +vrintm.f32.f32 s0, s2 +vrintm.f64.f64 d0, d2 +vrintn.f16 s0, s2 +vrintn.f32.f32 s0, s2 +vrintn.f64.f64 d0, d2 +vrintp.f16 s0, s2 +vrintp.f32.f32 s0, s2 +vrintp.f64.f64 d0, d2 +vrintr.f16.f16 s0, s2 +vrintr.f32.f32 s0, s2 +vrintr.f64.f64 d0, d2 +vrintz.f16.f16 s0, s2 +vrintz.f32.f32 s0, s2 +vrintz.f64.f64 d0, d2 +vrintx.f16.f16 s0, s2 +vrintx.f32.f32 s0, s2 +vrintx.f64.f64 d0, d2 +vseleq.f16 s0, s2, s1 +vseleq.f32 s0, s2, s1 +vseleq.f64 d0, d2, d1 +vsqrt.f16 s0, s2 +vsqrt.f32 s0, s2 +vsqrt.f64 d0, d2 +vsub.f16 s0, s2, s1 +vsub.f32 s0, s2, s1 +vsub.f64 d0, d2, d1 + +#vldr pc +#vldr [rn + value] +#vstr pc +#vstr [rn + value] + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 2 1.00 vabs.f16 s0, s2 +# CHECK-NEXT: 1 2 1.00 vabs.f32 s0, s2 +# CHECK-NEXT: 1 2 1.00 vabs.f64 d0, d2 +# CHECK-NEXT: 1 2 1.00 vadd.f16 s0, s2, s1 +# CHECK-NEXT: 1 2 1.00 vadd.f32 s0, s2, s1 +# CHECK-NEXT: 1 15 1.00 vadd.f64 d0, d2, d1 +# CHECK-NEXT: 1 1 1.00 vcmp.f16 s1, s2 +# CHECK-NEXT: 1 1 1.00 vcmp.f32 s1, s2 +# CHECK-NEXT: 1 1 1.00 vcmp.f64 d1, d2 +# CHECK-NEXT: 1 1 1.00 vcmp.f16 s1, #0 +# CHECK-NEXT: 1 1 1.00 vcmp.f32 s1, #0 +# CHECK-NEXT: 1 1 1.00 vcmp.f64 d1, #0 +# CHECK-NEXT: 1 1 1.00 vcmpe.f16 s1, s2 +# CHECK-NEXT: 1 1 1.00 vcmpe.f32 s1, s2 +# CHECK-NEXT: 1 1 1.00 vcmpe.f64 d1, d2 +# CHECK-NEXT: 1 1 1.00 vcmpe.f16 s1, #0 +# CHECK-NEXT: 1 1 1.00 vcmpe.f32 s1, #0 +# CHECK-NEXT: 1 1 1.00 vcmpe.f64 d1, #0 +# CHECK-NEXT: 1 2 1.00 vcvt.f32.f64 s1, d2 +# CHECK-NEXT: 1 2 1.00 vcvt.f64.f32 d1, s1 +# CHECK-NEXT: 1 2 1.00 vcvt.f16.u16 s1, s1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.f16.s16 s1, s1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.f16.u32 s1, s1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.f16.s32 s1, s1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.u16.f16 s1, s1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.s16.f16 s1, s1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.u32.f16 s1, s1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.s32.f16 s1, s1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.f32.u16 s1, s1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.f32.s16 s1, s1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.f32.u32 s1, s1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.f32.s32 s1, s1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.u16.f32 s1, s1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.s16.f32 s1, s1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.u32.f32 s1, s1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.s32.f32 s1, s1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.f64.u16 d1, d1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.f64.s16 d1, d1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.f64.u32 d1, d1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.f64.s32 d1, d1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.u16.f64 d1, d1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.s16.f64 d1, d1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.u32.f64 d1, d1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.s32.f64 d1, d1, #8 +# CHECK-NEXT: 1 2 1.00 vcvt.u32.f16 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvt.s32.f16 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvt.u32.f32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvt.s32.f32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvt.u32.f64 s1, d2 +# CHECK-NEXT: 1 2 1.00 vcvt.s32.f64 s1, d2 +# CHECK-NEXT: 1 2 1.00 vcvt.f16.u32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvt.f16.s32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvt.f32.u32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvt.f32.s32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvt.f64.u32 d1, s2 +# CHECK-NEXT: 1 2 1.00 vcvt.f64.s32 d1, s2 +# CHECK-NEXT: 1 2 1.00 vcvta.u32.f16 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvta.s32.f16 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvta.u32.f32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvta.s32.f32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvta.u32.f64 s1, d2 +# CHECK-NEXT: 1 2 1.00 vcvta.s32.f64 s1, d2 +# CHECK-NEXT: 1 2 1.00 vcvtm.u32.f16 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtm.s32.f16 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtm.u32.f32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtm.s32.f32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtm.u32.f64 s1, d2 +# CHECK-NEXT: 1 2 1.00 vcvtm.s32.f64 s1, d2 +# CHECK-NEXT: 1 2 1.00 vcvtn.u32.f16 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtn.s32.f16 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtn.u32.f32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtn.s32.f32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtn.u32.f64 s1, d2 +# CHECK-NEXT: 1 2 1.00 vcvtn.s32.f64 s1, d2 +# CHECK-NEXT: 1 2 1.00 vcvtp.u32.f16 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtp.s32.f16 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtp.u32.f32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtp.s32.f32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtp.u32.f64 s1, d2 +# CHECK-NEXT: 1 2 1.00 vcvtp.s32.f64 s1, d2 +# CHECK-NEXT: 1 2 1.00 vcvtb.f16.f32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtb.f16.f64 s1, d2 +# CHECK-NEXT: 1 2 1.00 vcvtb.f32.f16 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtb.f64.f16 d1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtr.u32.f16 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtr.s32.f16 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtr.u32.f32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtr.s32.f32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtr.u32.f64 s1, d2 +# CHECK-NEXT: 1 2 1.00 vcvtr.s32.f64 s1, d2 +# CHECK-NEXT: 1 2 1.00 vcvtt.f16.f32 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtt.f16.f64 s1, d2 +# CHECK-NEXT: 1 2 1.00 vcvtt.f32.f16 s1, s2 +# CHECK-NEXT: 1 2 1.00 vcvtt.f64.f16 d1, s2 +# CHECK-NEXT: 1 9 1.00 vdiv.f16 s0, s2, s1 +# CHECK-NEXT: 1 16 1.00 vdiv.f32 s0, s2, s1 +# CHECK-NEXT: 1 30 1.00 vdiv.f64 d0, d2, d1 +# CHECK-NEXT: 1 2 1.00 vfma.f16 s0, s2, s1 +# CHECK-NEXT: 1 2 1.00 vfma.f32 s0, s2, s1 +# CHECK-NEXT: 1 24 1.00 vfma.f64 d0, d2, d1 +# CHECK-NEXT: 1 2 1.00 vfms.f16 s0, s2, s1 +# CHECK-NEXT: 1 2 1.00 vfms.f32 s0, s2, s1 +# CHECK-NEXT: 1 24 1.00 vfms.f64 d0, d2, d1 +# CHECK-NEXT: 1 2 1.00 vfnma.f16 s0, s2, s1 +# CHECK-NEXT: 1 2 1.00 vfnma.f32 s0, s2, s1 +# CHECK-NEXT: 1 24 1.00 vfnma.f64 d0, d2, d1 +# CHECK-NEXT: 1 2 1.00 vfnms.f16 s0, s2, s1 +# CHECK-NEXT: 1 2 1.00 vfnms.f32 s0, s2, s1 +# CHECK-NEXT: 1 24 1.00 vfnms.f64 d0, d2, d1 +# CHECK-NEXT: 1 2 1.00 vins.f16 s0, s1 +# CHECK-NEXT: 1 2 1.00 vmaxnm.f16 s0, s2, s1 +# CHECK-NEXT: 1 2 1.00 vmaxnm.f32 s0, s2, s1 +# CHECK-NEXT: 1 2 1.00 vmaxnm.f64 d0, d2, d1 +# CHECK-NEXT: 1 2 1.00 vminnm.f16 s0, s2, s1 +# CHECK-NEXT: 1 2 1.00 vminnm.f32 s0, s2, s1 +# CHECK-NEXT: 1 2 1.00 vminnm.f64 d0, d2, d1 +# CHECK-NEXT: 1 4 1.00 vmla.f16 s0, s2, s1 +# CHECK-NEXT: 1 4 1.00 vmla.f32 s0, s2, s1 +# CHECK-NEXT: 1 36 1.00 vmla.f64 d0, d2, d1 +# CHECK-NEXT: 1 4 1.00 vmls.f16 s0, s2, s1 +# CHECK-NEXT: 1 4 1.00 vmls.f32 s0, s2, s1 +# CHECK-NEXT: 1 36 1.00 vmls.f64 d0, d2, d1 +# CHECK-NEXT: 1 2 1.00 vmov.f16 s0, r1 +# CHECK-NEXT: 1 1 1.00 vmov.f16 r0, s1 +# CHECK-NEXT: 1 2 1.00 vmov s0, r1 +# CHECK-NEXT: 1 1 1.00 vmov r0, s1 +# CHECK-NEXT: 1 2 1.00 vmov d0, r1, r2 +# CHECK-NEXT: 1 1 1.00 vmov r0, r1, d1 +# CHECK-NEXT: 1 2 1.00 vmov s0, s1, r0, r1 +# CHECK-NEXT: 1 1 1.00 vmov r0, r1, s0, s1 +# CHECK-NEXT: 1 1 1.00 vmov.f16 s0, #1.000000e+00 +# CHECK-NEXT: 1 1 1.00 vmov.f32 s0, #1.000000e+00 +# CHECK-NEXT: 1 1 1.00 vmov.f64 d0, #1.000000e+00 +# CHECK-NEXT: 1 1 1.00 vmov.f32 s0, s1 +# CHECK-NEXT: 1 1 1.00 vmov.f64 d0, d1 +# CHECK-NEXT: 1 2 1.00 vmovx.f16 s0, s1 +# CHECK-NEXT: 1 2 1.00 vmul.f16 s0, s2, s1 +# CHECK-NEXT: 1 2 1.00 vmul.f32 s0, s2, s1 +# CHECK-NEXT: 1 21 1.00 vmul.f64 d0, d2, d1 +# CHECK-NEXT: 1 2 1.00 vneg.f16 s0, s2 +# CHECK-NEXT: 1 2 1.00 vneg.f32 s0, s2 +# CHECK-NEXT: 1 2 1.00 vneg.f64 d0, d2 +# CHECK-NEXT: 1 4 1.00 vnmla.f16 s0, s2, s1 +# CHECK-NEXT: 1 4 1.00 vnmla.f32 s0, s2, s1 +# CHECK-NEXT: 1 36 1.00 vnmla.f64 d0, d2, d1 +# CHECK-NEXT: 1 4 1.00 vnmls.f16 s0, s2, s1 +# CHECK-NEXT: 1 4 1.00 vnmls.f32 s0, s2, s1 +# CHECK-NEXT: 1 36 1.00 vnmls.f64 d0, d2, d1 +# CHECK-NEXT: 1 2 1.00 vnmul.f16 s0, s2, s1 +# CHECK-NEXT: 1 2 1.00 vnmul.f32 s0, s2, s1 +# CHECK-NEXT: 1 21 1.00 vnmul.f64 d0, d2, d1 +# CHECK-NEXT: 1 2 1.00 vrinta.f16 s0, s2 +# CHECK-NEXT: 1 2 1.00 vrinta.f32 s0, s2 +# CHECK-NEXT: 1 2 1.00 vrinta.f64 d0, d2 +# CHECK-NEXT: 1 2 1.00 vrintm.f16 s0, s2 +# CHECK-NEXT: 1 2 1.00 vrintm.f32 s0, s2 +# CHECK-NEXT: 1 2 1.00 vrintm.f64 d0, d2 +# CHECK-NEXT: 1 2 1.00 vrintn.f16 s0, s2 +# CHECK-NEXT: 1 2 1.00 vrintn.f32 s0, s2 +# CHECK-NEXT: 1 2 1.00 vrintn.f64 d0, d2 +# CHECK-NEXT: 1 2 1.00 vrintp.f16 s0, s2 +# CHECK-NEXT: 1 2 1.00 vrintp.f32 s0, s2 +# CHECK-NEXT: 1 2 1.00 vrintp.f64 d0, d2 +# CHECK-NEXT: 1 2 1.00 vrintr.f16 s0, s2 +# CHECK-NEXT: 1 2 1.00 vrintr.f32 s0, s2 +# CHECK-NEXT: 1 2 1.00 vrintr.f64 d0, d2 +# CHECK-NEXT: 1 2 1.00 vrintz.f16 s0, s2 +# CHECK-NEXT: 1 2 1.00 vrintz.f32 s0, s2 +# CHECK-NEXT: 1 2 1.00 vrintz.f64 d0, d2 +# CHECK-NEXT: 1 2 1.00 vrintx.f16 s0, s2 +# CHECK-NEXT: 1 2 1.00 vrintx.f32 s0, s2 +# CHECK-NEXT: 1 2 1.00 vrintx.f64 d0, d2 +# CHECK-NEXT: 1 2 1.00 vseleq.f16 s0, s2, s1 +# CHECK-NEXT: 1 2 1.00 vseleq.f32 s0, s2, s1 +# CHECK-NEXT: 1 2 1.00 vseleq.f64 d0, d2, d1 +# CHECK-NEXT: 1 9 1.00 vsqrt.f16 s0, s2 +# CHECK-NEXT: 1 16 1.00 vsqrt.f32 s0, s2 +# CHECK-NEXT: 1 30 1.00 vsqrt.f64 d0, d2 +# CHECK-NEXT: 1 2 1.00 vsub.f16 s0, s2, s1 +# CHECK-NEXT: 1 2 1.00 vsub.f32 s0, s2, s1 +# CHECK-NEXT: 1 15 1.00 vsub.f64 d0, d2, d1 + +# CHECK: Resources: +# CHECK-NEXT: [0] - M55UnitALU +# CHECK-NEXT: [1] - M55UnitLoadStore +# CHECK-NEXT: [2] - M55UnitVecALU +# CHECK-NEXT: [3] - M55UnitVecFPALU +# CHECK-NEXT: [4] - M55UnitVecSys + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] +# CHECK-NEXT: - - - 181.00 - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] Instructions: +# CHECK-NEXT: - - - 1.00 - vabs.f16 s0, s2 +# CHECK-NEXT: - - - 1.00 - vabs.f32 s0, s2 +# CHECK-NEXT: - - - 1.00 - vabs.f64 d0, d2 +# CHECK-NEXT: - - - 1.00 - vadd.f16 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vadd.f32 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vadd.f64 d0, d2, d1 +# CHECK-NEXT: - - - 1.00 - vcmp.f16 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcmp.f32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcmp.f64 d1, d2 +# CHECK-NEXT: - - - 1.00 - vcmp.f16 s1, #0 +# CHECK-NEXT: - - - 1.00 - vcmp.f32 s1, #0 +# CHECK-NEXT: - - - 1.00 - vcmp.f64 d1, #0 +# CHECK-NEXT: - - - 1.00 - vcmpe.f16 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcmpe.f32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcmpe.f64 d1, d2 +# CHECK-NEXT: - - - 1.00 - vcmpe.f16 s1, #0 +# CHECK-NEXT: - - - 1.00 - vcmpe.f32 s1, #0 +# CHECK-NEXT: - - - 1.00 - vcmpe.f64 d1, #0 +# CHECK-NEXT: - - - 1.00 - vcvt.f32.f64 s1, d2 +# CHECK-NEXT: - - - 1.00 - vcvt.f64.f32 d1, s1 +# CHECK-NEXT: - - - 1.00 - vcvt.f16.u16 s1, s1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.f16.s16 s1, s1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.f16.u32 s1, s1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.f16.s32 s1, s1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.u16.f16 s1, s1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.s16.f16 s1, s1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.u32.f16 s1, s1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.s32.f16 s1, s1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.f32.u16 s1, s1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.f32.s16 s1, s1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.f32.u32 s1, s1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.f32.s32 s1, s1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.u16.f32 s1, s1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.s16.f32 s1, s1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.u32.f32 s1, s1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.s32.f32 s1, s1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.f64.u16 d1, d1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.f64.s16 d1, d1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.f64.u32 d1, d1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.f64.s32 d1, d1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.u16.f64 d1, d1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.s16.f64 d1, d1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.u32.f64 d1, d1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.s32.f64 d1, d1, #8 +# CHECK-NEXT: - - - 1.00 - vcvt.u32.f16 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvt.s32.f16 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvt.u32.f32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvt.s32.f32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvt.u32.f64 s1, d2 +# CHECK-NEXT: - - - 1.00 - vcvt.s32.f64 s1, d2 +# CHECK-NEXT: - - - 1.00 - vcvt.f16.u32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvt.f16.s32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvt.f32.u32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvt.f32.s32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvt.f64.u32 d1, s2 +# CHECK-NEXT: - - - 1.00 - vcvt.f64.s32 d1, s2 +# CHECK-NEXT: - - - 1.00 - vcvta.u32.f16 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvta.s32.f16 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvta.u32.f32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvta.s32.f32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvta.u32.f64 s1, d2 +# CHECK-NEXT: - - - 1.00 - vcvta.s32.f64 s1, d2 +# CHECK-NEXT: - - - 1.00 - vcvtm.u32.f16 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtm.s32.f16 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtm.u32.f32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtm.s32.f32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtm.u32.f64 s1, d2 +# CHECK-NEXT: - - - 1.00 - vcvtm.s32.f64 s1, d2 +# CHECK-NEXT: - - - 1.00 - vcvtn.u32.f16 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtn.s32.f16 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtn.u32.f32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtn.s32.f32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtn.u32.f64 s1, d2 +# CHECK-NEXT: - - - 1.00 - vcvtn.s32.f64 s1, d2 +# CHECK-NEXT: - - - 1.00 - vcvtp.u32.f16 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtp.s32.f16 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtp.u32.f32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtp.s32.f32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtp.u32.f64 s1, d2 +# CHECK-NEXT: - - - 1.00 - vcvtp.s32.f64 s1, d2 +# CHECK-NEXT: - - - 1.00 - vcvtb.f16.f32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtb.f16.f64 s1, d2 +# CHECK-NEXT: - - - 1.00 - vcvtb.f32.f16 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtb.f64.f16 d1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtr.u32.f16 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtr.s32.f16 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtr.u32.f32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtr.s32.f32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtr.u32.f64 s1, d2 +# CHECK-NEXT: - - - 1.00 - vcvtr.s32.f64 s1, d2 +# CHECK-NEXT: - - - 1.00 - vcvtt.f16.f32 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtt.f16.f64 s1, d2 +# CHECK-NEXT: - - - 1.00 - vcvtt.f32.f16 s1, s2 +# CHECK-NEXT: - - - 1.00 - vcvtt.f64.f16 d1, s2 +# CHECK-NEXT: - - - 1.00 - vdiv.f16 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vdiv.f32 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vdiv.f64 d0, d2, d1 +# CHECK-NEXT: - - - 1.00 - vfma.f16 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vfma.f32 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vfma.f64 d0, d2, d1 +# CHECK-NEXT: - - - 1.00 - vfms.f16 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vfms.f32 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vfms.f64 d0, d2, d1 +# CHECK-NEXT: - - - 1.00 - vfnma.f16 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vfnma.f32 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vfnma.f64 d0, d2, d1 +# CHECK-NEXT: - - - 1.00 - vfnms.f16 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vfnms.f32 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vfnms.f64 d0, d2, d1 +# CHECK-NEXT: - - - 1.00 - vins.f16 s0, s1 +# CHECK-NEXT: - - - 1.00 - vmaxnm.f16 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vmaxnm.f32 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vmaxnm.f64 d0, d2, d1 +# CHECK-NEXT: - - - 1.00 - vminnm.f16 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vminnm.f32 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vminnm.f64 d0, d2, d1 +# CHECK-NEXT: - - - 1.00 - vmla.f16 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vmla.f32 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vmla.f64 d0, d2, d1 +# CHECK-NEXT: - - - 1.00 - vmls.f16 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vmls.f32 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vmls.f64 d0, d2, d1 +# CHECK-NEXT: - - - 1.00 - vmov.f16 s0, r1 +# CHECK-NEXT: - - - 1.00 - vmov.f16 r0, s1 +# CHECK-NEXT: - - - 1.00 - vmov s0, r1 +# CHECK-NEXT: - - - 1.00 - vmov r0, s1 +# CHECK-NEXT: - - - 1.00 - vmov d0, r1, r2 +# CHECK-NEXT: - - - 1.00 - vmov r0, r1, d1 +# CHECK-NEXT: - - - 1.00 - vmov s0, s1, r0, r1 +# CHECK-NEXT: - - - 1.00 - vmov r0, r1, s0, s1 +# CHECK-NEXT: - - - 1.00 - vmov.f16 s0, #1.000000e+00 +# CHECK-NEXT: - - - 1.00 - vmov.f32 s0, #1.000000e+00 +# CHECK-NEXT: - - - 1.00 - vmov.f64 d0, #1.000000e+00 +# CHECK-NEXT: - - - 1.00 - vmov.f32 s0, s1 +# CHECK-NEXT: - - - 1.00 - vmov.f64 d0, d1 +# CHECK-NEXT: - - - 1.00 - vmovx.f16 s0, s1 +# CHECK-NEXT: - - - 1.00 - vmul.f16 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vmul.f32 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vmul.f64 d0, d2, d1 +# CHECK-NEXT: - - - 1.00 - vneg.f16 s0, s2 +# CHECK-NEXT: - - - 1.00 - vneg.f32 s0, s2 +# CHECK-NEXT: - - - 1.00 - vneg.f64 d0, d2 +# CHECK-NEXT: - - - 1.00 - vnmla.f16 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vnmla.f32 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vnmla.f64 d0, d2, d1 +# CHECK-NEXT: - - - 1.00 - vnmls.f16 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vnmls.f32 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vnmls.f64 d0, d2, d1 +# CHECK-NEXT: - - - 1.00 - vnmul.f16 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vnmul.f32 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vnmul.f64 d0, d2, d1 +# CHECK-NEXT: - - - 1.00 - vrinta.f16 s0, s2 +# CHECK-NEXT: - - - 1.00 - vrinta.f32 s0, s2 +# CHECK-NEXT: - - - 1.00 - vrinta.f64 d0, d2 +# CHECK-NEXT: - - - 1.00 - vrintm.f16 s0, s2 +# CHECK-NEXT: - - - 1.00 - vrintm.f32 s0, s2 +# CHECK-NEXT: - - - 1.00 - vrintm.f64 d0, d2 +# CHECK-NEXT: - - - 1.00 - vrintn.f16 s0, s2 +# CHECK-NEXT: - - - 1.00 - vrintn.f32 s0, s2 +# CHECK-NEXT: - - - 1.00 - vrintn.f64 d0, d2 +# CHECK-NEXT: - - - 1.00 - vrintp.f16 s0, s2 +# CHECK-NEXT: - - - 1.00 - vrintp.f32 s0, s2 +# CHECK-NEXT: - - - 1.00 - vrintp.f64 d0, d2 +# CHECK-NEXT: - - - 1.00 - vrintr.f16 s0, s2 +# CHECK-NEXT: - - - 1.00 - vrintr.f32 s0, s2 +# CHECK-NEXT: - - - 1.00 - vrintr.f64 d0, d2 +# CHECK-NEXT: - - - 1.00 - vrintz.f16 s0, s2 +# CHECK-NEXT: - - - 1.00 - vrintz.f32 s0, s2 +# CHECK-NEXT: - - - 1.00 - vrintz.f64 d0, d2 +# CHECK-NEXT: - - - 1.00 - vrintx.f16 s0, s2 +# CHECK-NEXT: - - - 1.00 - vrintx.f32 s0, s2 +# CHECK-NEXT: - - - 1.00 - vrintx.f64 d0, d2 +# CHECK-NEXT: - - - 1.00 - vseleq.f16 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vseleq.f32 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vseleq.f64 d0, d2, d1 +# CHECK-NEXT: - - - 1.00 - vsqrt.f16 s0, s2 +# CHECK-NEXT: - - - 1.00 - vsqrt.f32 s0, s2 +# CHECK-NEXT: - - - 1.00 - vsqrt.f64 d0, d2 +# CHECK-NEXT: - - - 1.00 - vsub.f16 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vsub.f32 s0, s2, s1 +# CHECK-NEXT: - - - 1.00 - vsub.f64 d0, d2, d1 diff --git a/llvm/test/tools/llvm-mca/ARM/m55-int.s b/llvm/test/tools/llvm-mca/ARM/m55-int.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/ARM/m55-int.s @@ -0,0 +1,1425 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=thumbv8.1-m.main-none-none-eabi -mcpu=cortex-m55 -mattr=+mve.fp -instruction-tables < %s | FileCheck %s + +adc r0, r1, #0 +adcs r0, r1, #0 +adcs r0, r1 +adc.w r0, r1, r2 +adcs.w r0, r1, r2 +adc.w r0, r1, r2, LSL #1 +adcs.w r0, r1, r2, LSL #1 +add r0, sp, #1 +add sp, #1 +add.w r0, sp, #1 +adds.w r0, sp, #1 +addw r0, sp, #1 +add r0, sp, r0 +add sp, r1 +add.w r0, sp, r1 +adds.w r0, sp, r1 +add.w r0, sp, r1, LSL #1 +adds.w r0, sp, r1, LSL #1 +adds r0, r1, #1 +adds r0, #42 +add.w r0, r1, #1 +adds.w r0, r1, #1 +addw r0, r1, #1 +adds r0, r1, r2 +add r0, r1 +add.w r0, r1, r2 +adds.w r0, r1, r2 +add.w r0, r1, r2, LSL #1 +adds.w r0, r1, r2, LSL #1 +adr r0, #-6 +adr r8, #-6 +adr.w r0, #-6 +and r0, r1, #1 +ands r0, r1, #1 +ands r1, r0 +and.w r0, r1, r2 +ands.w r0, r1, r2 +and.w r0, r1, r2, LSL #1 +ands.w r0, r1, r2, LSL #1 +asrs r0, r1, #1 +asr.w r0, r1, #1 +asrs.w r0, r1, #1 +asrs r0, r1 +asr.w r0, r1, r2 +asrs.w r0, r1, r2 +asrl r0, r1, #1 +asrl r0, r1, r2 +bfc r0, #1, #2 +bfi r0, r1, #1, #2 +bic r0, r1, #1 +bics r0, r1, #1 +bics r0, r1 +bic.w r0, r1, r2 +bics.w r0, r1, r2 +bic.w r0, r1, r2, LSL #1 +bics.w r0, r1, r2, LSL #1 +bkpt #1 +clrex +clrm {r1, r2} +clz r0, r1 +cmn r0, #1 +cmn r0, r1 +cmn.w r0, r1 +cmn.w r0, r1, LSL #1 +cmp r0, #1 +cmp.w r0, #1 +cmp r0, r1 +cmp r0, r10 +cmp.w r0, r1 +cmp.w r0, r1, LSL #1 +#cpsdb 1 +#cpsie if +csel r1, r2, r3, eq +csinc r1, r2, r3, eq +csinv r1, r2, r3, eq +csneg r1, r2, r3, eq +#dbg #1 +dmb +dsb +eor r0, r1, #1 +eors r0, r1, #1 +eors r0, r1 +eor.w r0, r1, r2 +eors.w r0, r1, r2 +eor.w r0, r1, r2, LSL #1 +eors.w r0, r1, r2, LSL #1 +isb +lda r0, [r1] +ldab r0, [r1] +ldaex r0, [r1] +ldaexb r0, [r1] +ldaexh r0, [r1] +ldah r0, [r1] +ldm r0!, {r1} +ldm r0, {r1} +ldm.w r0, {r1} +ldm.w r0!, {r1} +ldmdb r0, {r1} +ldmdb r0!, {r1} +ldr r0, [r1, #4] +ldr r0, [sp, #4] +ldr.w r0, [r1, #4] +ldr r0, [r1, #-1] +ldr r0, [r1], #1 +ldr r0, [r1, #1]! +ldr r0, #4 +ldr.w r0, #4 +ldr r0, next +ldr.w r0, next +ldr r0, [r1, r2] +ldr.w r0, [r1, r2] +ldr.w r0, [r1, r2, LSL #1] +ldrb r0, [r1, #1] +ldrb.w r0, [r1, #1] +ldrb r0, [r1, #-1] +ldrb r0, [r1], #1 +ldrb r0, [r1, #1]! +ldrb r0, #4 +ldrb r0, next +ldrb r0, [r1, r2] +ldrb.w r0, [r1, r2] +ldrb.w r0, [r1, r2, LSL #1] +ldrbt r0, [r1, #1] +ldrd r0, r2, [r1] +ldrd r0, r2, [r1, #-4] +ldrd r0, r2, [r1], #4 +ldrd r0, r2, [r1, #4]! +ldrd r0, r2, next +ldrex r0, [r1] +ldrex r0, [r1, #4] +ldrexb r0, [r1] +ldrexh r0, [r1] +ldrh r0, [r1, #2] +ldrh.w r0, [r1, #1] +ldrh r0, [r1, #-1] +ldrh r0, [r1], #1 +ldrh r0, [r1, #1]! +ldrh r0, #4 +ldrh r0, next +ldrh r0, [r1, r2] +ldrh.w r0, [r1, r2] +ldrh.w r0, [r1, r2, LSL #1] +ldrht r0, [r1, #1] +ldrsb r0, [r1, #1] +ldrsb r0, [r1, #-1] +ldrsb r0, [r1], #1 +ldrsb r0, [r1, #1]! +ldrsb r0, #4 +ldrsb r0, next +ldrsb r0, [r1, r2] +ldrsb.w r0, [r1, r2] +ldrsb.w r0, [r1, r2, LSL #1] +ldrsbt r0, [r1, #1] +ldrsh r0, [r1, #2] +ldrsh r0, [r1, #-1] +ldrsh r0, [r1], #1 +ldrsh r0, [r1, #1]! +ldrsh r0, #4 +ldrsh r0, next +ldrsh r0, [r1, r2] +ldrsh.w r0, [r1, r2] +ldrsh.w r0, [r1, r2, LSL #1] +ldrsht r0, [r1, #1] +ldrt r0, [r1, #1] +lsls r0, r1, #1 +lsl.w r0, r1, #1 +lsls.w r0, r1, #1 +lsls r0, r1 +lsl.w r0, r1, r2 +lsls.w r0, r1, r2 +lsll r0, r1, #2 +lsll r0, r1, r2 +lsrs r0, r1, #1 +lsr.w r0, r1, #1 +lsrs.w r0, r1, #1 +lsrs r0, r1 +lsr.w r0, r1, r2 +lsrs.w r0, r1, r2 +lsrl r0, r1, #2 +mla r0, r1, r2, r3 +mls r0, r1, r2, r3 +movs r0, #1 +mov.w r0, #1 +movs.w r0, #1 +movw r0, #1 +mov r0, r1 +#movs r0, r1 +mov.w r0, r1 +movs.w r0, r1 +movt r0, #1 +mrs r0, apsr +msr apsr, r0 +muls r1, r2, r1 +mul r0, r1, r2 +mvn r0, #1 +mvns r0, #1 +mvns r0, r1 +mvn.w r0, r1 +mvns.w r0, r1 +mvn.w r0, r1, LSL #1 +mvns.w r0, r1, LSL #1 +nop +orn r0, r1, #1 +orns r0, r1, #1 +orn r0, r1, r2 +orns r0, r1, r2 +orn r0, r1, r2, LSL #1 +orns r0, r1, r2, LSL #1 +orr r0, r1, #1 +orrs r0, r1, #1 +orrs r0, r1 +orr r0, r1, r2 +orrs r0, r1, r2 +orr r0, r1, r2, LSL #1 +orrs r0, r1, r2, LSL #1 +pkhbt r0, r1, r2 +pkhbt r0, r1, r2, LSL #1 +pkhtb r0, r1, r2 +pkhtb r0, r1, r2, ASR #1 +pop { r0 } +pop.w { r0, r1 } +pop.w { r0 } +pssbb +push { r0 } +push.w { r0, r1 } +push.w { r0 } +qadd r0, r1, r2 +qadd16 r0, r1, r2 +qadd8 r0, r1, r2 +qasx r0, r1, r2 +qdadd r0, r1, r2 +qdsub r0, r1, r2 +qsax r0, r1, r2 +qsub r0, r1, r2 +qsub16 r0, r1, r2 +qsub8 r0, r1, r2 +rbit r0, r1 +rev r0, r1 +rev.w r0, r1 +rev16 r0, r1 +rev16.w r0, r1 +revsh r0, r1 +revsh.w r0, r1 +ror r0, r1, #1 +rors r0, r1, #1 +rors r0, r1 +ror.w r0, r1, r2 +rors.w r0, r1, r2 +rrx r0, r1 +rrxs r0, r1 +rsbs r0, r1, #0 +rsb.w r0, r1, #1 +rsbs.w r0, r1, #1 +rsb r0, r1, r2 +rsbs r0, r1, r2 +rsb r0, r1, r2, LSL #1 +rsbs r0, r1, r2, LSL #1 +sadd16 r0, r1, r2 +sadd8 r0, r1, r2 +sasx r0, r1, r2 +sbc r0, r1, #1 +sbcs r0, r1, #1 +sbcs r0, r1 +sbc r0, r1, r2 +sbcs r0, r1, r2 +sbc r0, r1, r2, LSL #1 +sbcs r0, r1, r2, LSL #1 +sbfx r0, r1, #1, #2 +sdiv r0, r1, r2 +sel r0, r1, r2 +sev +#sg +shadd16 r0, r1, r2 +shadd8 r0, r1, r2 +shasx r0, r1, r2 +shsax r0, r1, r2 +shsub16 r0, r1, r2 +shsub8 r0, r1, r2 +smlabb r0, r1, r2, r3 +smlabt r0, r1, r2, r3 +smlatb r0, r1, r2, r3 +smlatt r0, r1, r2, r3 +smlad r0, r1, r2, r3 +smladx r0, r1, r2, r3 +smlal r0, r1, r2, r3 +smlalbb r0, r1, r2, r3 +smlalbt r0, r1, r2, r3 +smlaltb r0, r1, r2, r3 +smlaltt r0, r1, r2, r3 +smlald r0, r1, r2, r3 +smlaldx r0, r1, r2, r3 +smlawb r0, r1, r2, r3 +smlawt r0, r1, r2, r3 +smlsd r0, r1, r2, r3 +smlsdx r0, r1, r2, r3 +smlsld r0, r1, r2, r3 +smlsldx r0, r1, r2, r3 +smmla r0, r1, r2, r3 +smmlar r0, r1, r2, r3 +smmls r0, r1, r2, r3 +smmlsr r0, r1, r2, r3 +smmul r0, r1, r2 +smmulr r0, r1, r2 +smuad r0, r1, r2 +smuadx r0, r1, r2 +smulbb r0, r1, r2 +smulbt r0, r1, r2 +smultb r0, r1, r2 +smultt r0, r1, r2 +smull r0, r1, r2, r3 +smulwb r0, r1, r2 +smulwt r0, r1, r2 +smusd r0, r1, r2 +smusdx r0, r1, r2 +sqrshr r0, r1 +sqrshrl r0, r1, #48, r2 +sqshl r0, #7 +sqshll r0, r1, #7 +srshr r0, #7 +srshrl r0, r1, #7 +ssat r0, #1, r2 +ssat r0, #1, r2, LSL #1 +ssat16 r0, #1, r1 +ssax r0, r1, r2 +ssbb +ssub16 r0, r1, r2 +ssub8 r0, r1, r2 +stl r0, [r1] +stlb r0, [r1] +stlex r0, r1, [r2] +stlexb r0, r1, [r2] +stlexh r0, r1, [r2] +stlh r0, [r1] +stm r0!, { r1 } +stm.w r0, { r1 } +stm.w r0!, { r1 } +stmdb r0, { r1 } +stmdb r0!, { r1 } +str r0, [ r1 ] +str r0, [ r1, #4 ] +str r0, [ sp, #4 ] +str.w r0, [ r1, #1 ] +str r0, [ r1, #-1 ] +str r0, [ r1 ], #1 +#str r0, [ r1, #1 ]! +str r0, [ r1, r2 ] +str.w r0, [ r1, r2 ] +str.w r0, [ r1, r2, LSL #1 ] +strb r0, [ r1 ] +strb r0, [ r1, #1 ] +strb.w r0, [ r1, #1 ] +strb r0, [ r1, #-1 ] +strb r0, [ r1 ], #1 +strb r0, [ r1, #1 ]! +strb r0, [ r1, r2 ] +strb.w r0, [ r1, r2 ] +strb.w r0, [ r1, r2, LSL #1 ] +strbt r0, [ r1, #1 ] +strd r0, r1, [ r2, #4 ] +strd r0, r1, [ r2 ], #4 +strd r0, r1, [ r2, #4 ]! +strex r0, r1, [ r2 ] +strex r0, r1, [ r2, #4 ] +strexb r0, r1, [ r2 ] +strexh r0, r1, [ r2 ] +strh r0, [ r1 ] +strh r0, [ r1, #2 ] +strh.w r0, [ r1, #2 ] +strh r0, [ r1, #-1 ] +strh r0, [ r1 ], #1 +strh r0, [ r1, #1 ]! +strh r0, [ r1, r2 ] +strh.w r0, [ r1, r2 ] +strh.w r0, [ r1, r2, LSL #1 ] +strht r0, [r1, #1 ] +strt r0, [r1, #1 ] +sub sp, sp, #4 +sub.w r0, sp, #1 +subs.w r0, sp, #1 +subw r0, sp, #1 +sub r0, sp, r1 +subs r0, sp, r1 +sub r0, sp, r1, LSL #1 +subs r0, sp, r1, LSL #1 +subs r0, r1, #1 +subs r0, #1 +sub.w r0, r1, #1 +subs.w r0, r1, #1 +subw r0, r1, #1 +subs r0, r1, r2 +sub.w r0, r1, r2 +subs.w r0, r1, r2 +sub.w r0, r1, r2, LSL #1 +subs.w r0, r1, r2, LSL #1 +#svc #1 ; treated as a call +sxtab r0, r1, r2 +sxtab r0, r1, r2, ROR #8 +sxtab16 r0, r1, r2 +sxtab16 r0, r1, r2, ROR #8 +sxtah r0, r1, r2 +sxtah r0, r1, r2, ROR #8 +sxtb r0, r1 +sxtb.w r0, r1 +sxtb.w r0, r1, ROR #8 +sxtb16 r0, r1 +sxtb16 r0, r1, ROR #8 +sxth r0, r1 +sxth.w r0, r1 +sxth.w r0, r1, ROR #8 +tbb [r0, r1] +tbh [r0, r1, LSL #1] +teq r0, #1 +teq r0, r1 +teq r0, r1, LSL #1 +tst r0, #1 +tst r0, r1 +tst.w r0, r1 +tst.w r0, r1, LSL #1 +#tt r0, r1 +#ttt r0, r1 +#tta r0, r1 +#ttat r0, r1 +uadd16 r0, r1, r2 +uadd8 r0, r1, r2 +uasx r0, r1, r2 +ubfx r0, r1, #1, #2 +#udf #1 +udiv r0, r1, r2 +uhadd16 r0, r1, r2 +uhadd8 r0, r1, r2 +uhasx r0, r1, r2 +uhsax r0, r1, r2 +uhsub16 r0, r1, r2 +uhsub8 r0, r1, r2 +umaal r0, r1, r2, r3 +umlal r0, r1, r2, r3 +umull r0, r1, r2, r3 +uqadd16 r0, r1, r2 +uqadd8 r0, r1, r2 +uqasx r0, r1, r2 +uqrshl r0, r1 +uqrshll r0, r1, #48, r2 +uqsax r0, r1, r2 +uqshl r0, #1 +uqshll r0, r1, #1 +uqsub16 r0, r1, r2 +uqsub8 r0, r1, r2 +urshr r0, #1 +urshrl r0, r1, #1 +usad8 r0, r1, r2 +usada8 r0, r1, r2, r3 +usat r0, #1, r1 +usat r0, #1, r1, LSL #1 +usat16 r0, #1, r1 +usax r0, r1, r2 +usub16 r0, r1, r2 +usub8 r0, r1, r2 +uxtab r0, r1, r2 +uxtab r0, r1, r2, ROR #8 +uxtab16 r0, r1, r2 +uxtab16 r0, r1, r2, ROR #8 +uxtah r0, r1, r2 +uxtah r0, r1, r2, ROR #8 +uxtb r0, r1 +uxtb.w r0, r1 +uxtb.w r0, r1, ROR #8 +uxtb16 r0, r1 +uxtb16 r0, r1, ROR #8 +uxth r0, r1 +uxth.w r0, r1 +uxth.w r0, r1, ROR #8 +wfe +wfi +yield + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 1.00 adc r0, r1, #0 +# CHECK-NEXT: 1 1 1.00 adcs r0, r1, #0 +# CHECK-NEXT: 1 1 1.00 U adcs r0, r1 +# CHECK-NEXT: 1 1 1.00 adc.w r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 adcs.w r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 adc.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 2 1.00 adcs.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 1 0.50 add.w r0, sp, #1 +# CHECK-NEXT: 1 1 1.00 U add.w sp, sp, #1 +# CHECK-NEXT: 1 1 0.50 add.w r0, sp, #1 +# CHECK-NEXT: 1 1 0.50 adds.w r0, sp, #1 +# CHECK-NEXT: 1 1 1.00 addw r0, sp, #1 +# CHECK-NEXT: 1 1 1.00 U add r0, sp, r0 +# CHECK-NEXT: 1 1 1.00 U add sp, r1 +# CHECK-NEXT: 1 1 1.00 add.w r0, sp, r1 +# CHECK-NEXT: 1 1 1.00 adds.w r0, sp, r1 +# CHECK-NEXT: 1 2 1.00 add.w r0, sp, r1, lsl #1 +# CHECK-NEXT: 1 2 1.00 adds.w r0, sp, r1, lsl #1 +# CHECK-NEXT: 1 1 0.50 adds r0, r1, #1 +# CHECK-NEXT: 1 1 0.50 adds r0, #42 +# CHECK-NEXT: 1 1 0.50 add.w r0, r1, #1 +# CHECK-NEXT: 1 1 0.50 adds.w r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 addw r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 adds r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 add r0, r1 +# CHECK-NEXT: 1 1 1.00 add.w r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 adds.w r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 add.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 2 1.00 adds.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 1 1.00 U adr.w r0, #-6 +# CHECK-NEXT: 1 1 1.00 U adr.w r8, #-6 +# CHECK-NEXT: 1 1 1.00 U adr.w r0, #-6 +# CHECK-NEXT: 1 1 1.00 and r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 ands r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 ands r1, r0 +# CHECK-NEXT: 1 1 1.00 and.w r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 ands.w r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 and.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 2 1.00 ands.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 1 1.00 asrs r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 asr.w r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 asrs.w r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 asrs r0, r1 +# CHECK-NEXT: 1 1 1.00 asr.w r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 asrs.w r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 asrl r0, r1, #1 +# CHECK-NEXT: 1 2 1.00 asrl r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 bfc r0, #1, #2 +# CHECK-NEXT: 1 1 1.00 bfi r0, r1, #1, #2 +# CHECK-NEXT: 1 1 1.00 bic r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 bics r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 bics r0, r1 +# CHECK-NEXT: 1 1 1.00 bic.w r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 bics.w r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 bic.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 2 1.00 bics.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 1 1.00 U bkpt #1 +# CHECK-NEXT: 1 2 1.00 * * U clrex +# CHECK-NEXT: 1 2 1.00 U clrm {r1, r2} +# CHECK-NEXT: 1 1 1.00 clz r0, r1 +# CHECK-NEXT: 1 1 1.00 cmn.w r0, #1 +# CHECK-NEXT: 1 1 1.00 cmn r0, r1 +# CHECK-NEXT: 1 1 1.00 cmn.w r0, r1 +# CHECK-NEXT: 1 2 1.00 cmn.w r0, r1, lsl #1 +# CHECK-NEXT: 1 1 1.00 cmp r0, #1 +# CHECK-NEXT: 1 1 1.00 cmp.w r0, #1 +# CHECK-NEXT: 1 1 1.00 cmp r0, r1 +# CHECK-NEXT: 1 1 1.00 U cmp r0, r10 +# CHECK-NEXT: 1 1 1.00 cmp.w r0, r1 +# CHECK-NEXT: 1 2 1.00 cmp.w r0, r1, lsl #1 +# CHECK-NEXT: 1 1 1.00 csel r1, r2, r3, eq +# CHECK-NEXT: 1 1 1.00 csinc r1, r2, r3, eq +# CHECK-NEXT: 1 1 1.00 csinv r1, r2, r3, eq +# CHECK-NEXT: 1 1 1.00 csneg r1, r2, r3, eq +# CHECK-NEXT: 1 2 1.00 * * U dmb sy +# CHECK-NEXT: 1 2 1.00 * * U dsb sy +# CHECK-NEXT: 1 1 1.00 eor r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 eors r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 eors r0, r1 +# CHECK-NEXT: 1 1 1.00 eor.w r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 eors.w r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 eor.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 2 1.00 eors.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 2 1.00 * * U isb sy +# CHECK-NEXT: 1 2 1.00 * lda r0, [r1] +# CHECK-NEXT: 1 2 1.00 * ldab r0, [r1] +# CHECK-NEXT: 1 2 1.00 * * U ldaex r0, [r1] +# CHECK-NEXT: 1 2 1.00 * * U ldaexb r0, [r1] +# CHECK-NEXT: 1 2 1.00 * * U ldaexh r0, [r1] +# CHECK-NEXT: 1 2 1.00 * ldah r0, [r1] +# CHECK-NEXT: 1 2 1.00 * ldm r0!, {r1} +# CHECK-NEXT: 1 2 1.00 * ldm.w r0, {r1} +# CHECK-NEXT: 1 2 1.00 * ldm.w r0, {r1} +# CHECK-NEXT: 1 2 1.00 * ldr r1, [r0], #4 +# CHECK-NEXT: 1 2 1.00 * ldmdb r0, {r1} +# CHECK-NEXT: 1 2 1.00 * ldmdb r0!, {r1} +# CHECK-NEXT: 1 2 1.00 * ldr r0, [r1, #4] +# CHECK-NEXT: 1 2 1.00 * ldr r0, [sp, #4] +# CHECK-NEXT: 1 2 1.00 * ldr.w r0, [r1, #4] +# CHECK-NEXT: 1 2 1.00 * ldr r0, [r1, #-1] +# CHECK-NEXT: 1 2 1.00 * ldr r0, [r1], #1 +# CHECK-NEXT: 1 2 1.00 * ldr r0, [r1, #1]! +# CHECK-NEXT: 1 2 1.00 * ldr r0, [pc, #4] +# CHECK-NEXT: 1 2 1.00 * ldr.w r0, [pc, #4] +# CHECK-NEXT: 1 2 1.00 * ldr r0, next +# CHECK-NEXT: 1 2 1.00 * ldr.w r0, next +# CHECK-NEXT: 1 2 1.00 * ldr r0, [r1, r2] +# CHECK-NEXT: 1 2 1.00 * ldr.w r0, [r1, r2] +# CHECK-NEXT: 1 2 1.00 * ldr.w r0, [r1, r2, lsl #1] +# CHECK-NEXT: 1 2 1.00 * ldrb r0, [r1, #1] +# CHECK-NEXT: 1 2 1.00 * ldrb.w r0, [r1, #1] +# CHECK-NEXT: 1 2 1.00 * ldrb r0, [r1, #-1] +# CHECK-NEXT: 1 2 1.00 * ldrb r0, [r1], #1 +# CHECK-NEXT: 1 2 1.00 * ldrb r0, [r1, #1]! +# CHECK-NEXT: 1 2 1.00 * ldrb.w r0, [pc, #4] +# CHECK-NEXT: 1 2 1.00 * ldrb.w r0, next +# CHECK-NEXT: 1 2 1.00 * ldrb r0, [r1, r2] +# CHECK-NEXT: 1 2 1.00 * ldrb.w r0, [r1, r2] +# CHECK-NEXT: 1 2 1.00 * ldrb.w r0, [r1, r2, lsl #1] +# CHECK-NEXT: 1 2 1.00 U ldrbt r0, [r1, #1] +# CHECK-NEXT: 1 2 1.00 * ldrd r0, r2, [r1] +# CHECK-NEXT: 1 2 1.00 * ldrd r0, r2, [r1, #-4] +# CHECK-NEXT: 1 2 1.00 * ldrd r0, r2, [r1], #4 +# CHECK-NEXT: 1 2 1.00 * ldrd r0, r2, [r1, #4]! +# CHECK-NEXT: 1 2 1.00 * ldrd r0, r2, next +# CHECK-NEXT: 1 2 1.00 * * U ldrex r0, [r1] +# CHECK-NEXT: 1 2 1.00 * * U ldrex r0, [r1, #4] +# CHECK-NEXT: 1 2 1.00 * * U ldrexb r0, [r1] +# CHECK-NEXT: 1 2 1.00 * * U ldrexh r0, [r1] +# CHECK-NEXT: 1 2 1.00 * ldrh r0, [r1, #2] +# CHECK-NEXT: 1 2 1.00 * ldrh.w r0, [r1, #1] +# CHECK-NEXT: 1 2 1.00 * ldrh r0, [r1, #-1] +# CHECK-NEXT: 1 2 1.00 * ldrh r0, [r1], #1 +# CHECK-NEXT: 1 2 1.00 * ldrh r0, [r1, #1]! +# CHECK-NEXT: 1 2 1.00 * ldrh.w r0, [pc, #4] +# CHECK-NEXT: 1 2 1.00 * ldrh.w r0, next +# CHECK-NEXT: 1 2 1.00 * ldrh r0, [r1, r2] +# CHECK-NEXT: 1 2 1.00 * ldrh.w r0, [r1, r2] +# CHECK-NEXT: 1 2 1.00 * ldrh.w r0, [r1, r2, lsl #1] +# CHECK-NEXT: 1 2 1.00 U ldrht r0, [r1, #1] +# CHECK-NEXT: 1 2 1.00 * ldrsb.w r0, [r1, #1] +# CHECK-NEXT: 1 2 1.00 * ldrsb r0, [r1, #-1] +# CHECK-NEXT: 1 2 1.00 * ldrsb r0, [r1], #1 +# CHECK-NEXT: 1 2 1.00 * ldrsb r0, [r1, #1]! +# CHECK-NEXT: 1 2 1.00 * ldrsb.w r0, [pc, #4] +# CHECK-NEXT: 1 2 1.00 * ldrsb.w r0, next +# CHECK-NEXT: 1 2 1.00 * ldrsb r0, [r1, r2] +# CHECK-NEXT: 1 2 1.00 * ldrsb.w r0, [r1, r2] +# CHECK-NEXT: 1 2 1.00 * ldrsb.w r0, [r1, r2, lsl #1] +# CHECK-NEXT: 1 2 1.00 U ldrsbt r0, [r1, #1] +# CHECK-NEXT: 1 2 1.00 * ldrsh.w r0, [r1, #2] +# CHECK-NEXT: 1 2 1.00 * ldrsh r0, [r1, #-1] +# CHECK-NEXT: 1 2 1.00 * ldrsh r0, [r1], #1 +# CHECK-NEXT: 1 2 1.00 * ldrsh r0, [r1, #1]! +# CHECK-NEXT: 1 2 1.00 * ldrsh.w r0, [pc, #4] +# CHECK-NEXT: 1 2 1.00 * ldrsh.w r0, next +# CHECK-NEXT: 1 2 1.00 * ldrsh r0, [r1, r2] +# CHECK-NEXT: 1 2 1.00 * ldrsh.w r0, [r1, r2] +# CHECK-NEXT: 1 2 1.00 * ldrsh.w r0, [r1, r2, lsl #1] +# CHECK-NEXT: 1 2 1.00 U ldrsht r0, [r1, #1] +# CHECK-NEXT: 1 2 1.00 U ldrt r0, [r1, #1] +# CHECK-NEXT: 1 1 1.00 lsls r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 lsl.w r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 lsls.w r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 lsls r0, r1 +# CHECK-NEXT: 1 1 1.00 lsl.w r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 lsls.w r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 lsll r0, r1, #2 +# CHECK-NEXT: 1 2 1.00 lsll r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 lsrs r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 lsr.w r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 lsrs.w r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 lsrs r0, r1 +# CHECK-NEXT: 1 1 1.00 lsr.w r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 lsrs.w r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 lsrl r0, r1, #2 +# CHECK-NEXT: 1 2 1.00 mla r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 mls r0, r1, r2, r3 +# CHECK-NEXT: 1 1 0.50 movs r0, #1 +# CHECK-NEXT: 1 1 0.50 mov.w r0, #1 +# CHECK-NEXT: 1 1 0.50 movs.w r0, #1 +# CHECK-NEXT: 1 1 0.50 movw r0, #1 +# CHECK-NEXT: 1 1 0.50 mov r0, r1 +# CHECK-NEXT: 1 1 0.50 mov.w r0, r1 +# CHECK-NEXT: 1 1 0.50 movs.w r0, r1 +# CHECK-NEXT: 1 1 1.00 movt r0, #1 +# CHECK-NEXT: 1 1 1.00 U mrs r0, apsr +# CHECK-NEXT: 1 1 1.00 U msr apsr_nzcvq, r0 +# CHECK-NEXT: 1 1 1.00 muls r1, r2, r1 +# CHECK-NEXT: 1 2 1.00 mul r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 mvn r0, #1 +# CHECK-NEXT: 1 1 1.00 mvns r0, #1 +# CHECK-NEXT: 1 1 1.00 mvns r0, r1 +# CHECK-NEXT: 1 1 1.00 mvn.w r0, r1 +# CHECK-NEXT: 1 1 1.00 mvns.w r0, r1 +# CHECK-NEXT: 1 1 1.00 mvn.w r0, r1, lsl #1 +# CHECK-NEXT: 1 1 1.00 mvns.w r0, r1, lsl #1 +# CHECK-NEXT: 1 1 1.00 * * U nop +# CHECK-NEXT: 1 1 1.00 orn r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 orns r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 orn r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 orns r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 orn r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 2 1.00 orns r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 1 1.00 orr r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 orrs r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 orrs r0, r1 +# CHECK-NEXT: 1 1 1.00 orr.w r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 orrs.w r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 orr.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 2 1.00 orrs.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 2 1.00 pkhbt r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 pkhbt r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 2 1.00 pkhbt r0, r2, r1 +# CHECK-NEXT: 1 2 1.00 pkhtb r0, r1, r2, asr #1 +# CHECK-NEXT: 1 1 1.00 * U pop {r0} +# CHECK-NEXT: 1 2 1.00 * pop.w {r0, r1} +# CHECK-NEXT: 1 2 1.00 * ldr r0, [sp], #4 +# CHECK-NEXT: 1 2 1.00 * * U pssbb +# CHECK-NEXT: 1 1 1.00 * U push {r0} +# CHECK-NEXT: 1 1 1.00 * push.w {r0, r1} +# CHECK-NEXT: 1 1 1.00 * str r0, [sp, #-4]! +# CHECK-NEXT: 1 2 1.00 qadd r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 qadd16 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 qadd8 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 qasx r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 qdadd r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 qdsub r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 qsax r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 qsub r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 qsub16 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 qsub8 r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 rbit r0, r1 +# CHECK-NEXT: 1 1 1.00 rev r0, r1 +# CHECK-NEXT: 1 1 1.00 rev.w r0, r1 +# CHECK-NEXT: 1 1 1.00 rev16 r0, r1 +# CHECK-NEXT: 1 1 1.00 rev16.w r0, r1 +# CHECK-NEXT: 1 1 1.00 revsh r0, r1 +# CHECK-NEXT: 1 1 1.00 revsh.w r0, r1 +# CHECK-NEXT: 1 1 1.00 ror.w r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 rors.w r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 rors r0, r1 +# CHECK-NEXT: 1 1 1.00 ror.w r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 rors.w r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 rrx r0, r1 +# CHECK-NEXT: 1 1 1.00 rrxs r0, r1 +# CHECK-NEXT: 1 1 1.00 rsbs r0, r1, #0 +# CHECK-NEXT: 1 1 1.00 rsb.w r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 rsbs.w r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 U rsb r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 U rsbs r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 rsb r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 2 1.00 rsbs r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 2 1.00 * * U sadd16 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 * * U sadd8 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 * * U sasx r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 sbc r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 sbcs r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 U sbcs r0, r1 +# CHECK-NEXT: 1 1 1.00 sbc.w r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 sbcs.w r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 sbc.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 2 1.00 sbcs.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 2 1.00 sbfx r0, r1, #1, #2 +# CHECK-NEXT: 1 2 1.00 sdiv r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 * sel r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 * * U sev +# CHECK-NEXT: 1 2 1.00 shadd16 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 shadd8 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 shasx r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 shsax r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 shsub16 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 shsub8 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 smlabb r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlabt r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlatb r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlatt r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlad r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smladx r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlal r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlalbb r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlalbt r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlaltb r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlaltt r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlald r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlaldx r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlawb r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlawt r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlsd r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlsdx r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlsld r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smlsldx r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smmla r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smmlar r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 U smmls r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smmlsr r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smmul r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 smmulr r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 smuad r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 smuadx r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 smulbb r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 smulbt r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 smultb r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 smultt r0, r1, r2 +# CHECK-NEXT: 2 2 1.00 smull r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 smulwb r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 smulwt r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 smusd r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 smusdx r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 sqrshr r0, r1 +# CHECK-NEXT: 1 2 1.00 sqrshrl r0, r1, #48, r2 +# CHECK-NEXT: 1 2 1.00 sqshl r0, #7 +# CHECK-NEXT: 1 2 1.00 sqshll r0, r1, #7 +# CHECK-NEXT: 1 2 1.00 srshr r0, #7 +# CHECK-NEXT: 1 2 1.00 srshrl r0, r1, #7 +# CHECK-NEXT: 1 2 1.00 ssat r0, #1, r2 +# CHECK-NEXT: 1 2 1.00 ssat r0, #1, r2, lsl #1 +# CHECK-NEXT: 1 2 1.00 ssat16 r0, #1, r1 +# CHECK-NEXT: 1 2 1.00 * * U ssax r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 * * U ssbb +# CHECK-NEXT: 1 2 1.00 * * U ssub16 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 * * U ssub8 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 * stl r0, [r1] +# CHECK-NEXT: 1 2 1.00 * stlb r0, [r1] +# CHECK-NEXT: 1 2 1.00 * * U stlex r0, r1, [r2] +# CHECK-NEXT: 1 2 1.00 * * U stlexb r0, r1, [r2] +# CHECK-NEXT: 1 2 1.00 * * U stlexh r0, r1, [r2] +# CHECK-NEXT: 1 2 1.00 * stlh r0, [r1] +# CHECK-NEXT: 1 1 1.00 * stm r0!, {r1} +# CHECK-NEXT: 1 1 1.00 * stm.w r0, {r1} +# CHECK-NEXT: 1 1 1.00 * stm.w r0!, {r1} +# CHECK-NEXT: 1 1 1.00 * stmdb r0, {r1} +# CHECK-NEXT: 1 1 1.00 * str r1, [r0, #-4]! +# CHECK-NEXT: 1 1 1.00 * str r0, [r1] +# CHECK-NEXT: 1 1 1.00 * str r0, [r1, #4] +# CHECK-NEXT: 1 1 1.00 * str r0, [sp, #4] +# CHECK-NEXT: 1 1 1.00 * str.w r0, [r1, #1] +# CHECK-NEXT: 1 1 1.00 * str r0, [r1, #-1] +# CHECK-NEXT: 1 1 1.00 * str r0, [r1], #1 +# CHECK-NEXT: 1 1 1.00 * str r0, [r1, r2] +# CHECK-NEXT: 1 1 1.00 * str.w r0, [r1, r2] +# CHECK-NEXT: 1 1 1.00 * str.w r0, [r1, r2, lsl #1] +# CHECK-NEXT: 1 1 1.00 * strb r0, [r1] +# CHECK-NEXT: 1 1 1.00 * strb r0, [r1, #1] +# CHECK-NEXT: 1 1 1.00 * strb.w r0, [r1, #1] +# CHECK-NEXT: 1 1 1.00 * strb r0, [r1, #-1] +# CHECK-NEXT: 1 1 1.00 * strb r0, [r1], #1 +# CHECK-NEXT: 1 1 1.00 * strb r0, [r1, #1]! +# CHECK-NEXT: 1 1 1.00 * strb r0, [r1, r2] +# CHECK-NEXT: 1 1 1.00 * strb.w r0, [r1, r2] +# CHECK-NEXT: 1 1 1.00 * strb.w r0, [r1, r2, lsl #1] +# CHECK-NEXT: 1 1 1.00 U strbt r0, [r1, #1] +# CHECK-NEXT: 1 2 1.00 * strd r0, r1, [r2, #4] +# CHECK-NEXT: 1 2 1.00 * strd r0, r1, [r2], #4 +# CHECK-NEXT: 1 2 1.00 * strd r0, r1, [r2, #4]! +# CHECK-NEXT: 1 1 1.00 * * U strex r0, r1, [r2] +# CHECK-NEXT: 1 1 1.00 * * U strex r0, r1, [r2, #4] +# CHECK-NEXT: 1 1 1.00 * * U strexb r0, r1, [r2] +# CHECK-NEXT: 1 1 1.00 * * U strexh r0, r1, [r2] +# CHECK-NEXT: 1 1 1.00 * strh r0, [r1] +# CHECK-NEXT: 1 1 1.00 * strh r0, [r1, #2] +# CHECK-NEXT: 1 1 1.00 * strh.w r0, [r1, #2] +# CHECK-NEXT: 1 1 1.00 * strh r0, [r1, #-1] +# CHECK-NEXT: 1 1 1.00 * strh r0, [r1], #1 +# CHECK-NEXT: 1 1 1.00 * strh r0, [r1, #1]! +# CHECK-NEXT: 1 1 1.00 * strh r0, [r1, r2] +# CHECK-NEXT: 1 1 1.00 * strh.w r0, [r1, r2] +# CHECK-NEXT: 1 1 1.00 * strh.w r0, [r1, r2, lsl #1] +# CHECK-NEXT: 1 1 1.00 U strht r0, [r1, #1] +# CHECK-NEXT: 1 1 1.00 U strt r0, [r1, #1] +# CHECK-NEXT: 1 1 1.00 U sub sp, #4 +# CHECK-NEXT: 1 1 0.50 sub.w r0, sp, #1 +# CHECK-NEXT: 1 1 0.50 subs.w r0, sp, #1 +# CHECK-NEXT: 1 1 1.00 subw r0, sp, #1 +# CHECK-NEXT: 1 1 1.00 sub.w r0, sp, r1 +# CHECK-NEXT: 1 1 1.00 subs.w r0, sp, r1 +# CHECK-NEXT: 1 2 1.00 sub.w r0, sp, r1, lsl #1 +# CHECK-NEXT: 1 2 1.00 subs.w r0, sp, r1, lsl #1 +# CHECK-NEXT: 1 1 0.50 subs r0, r1, #1 +# CHECK-NEXT: 1 1 0.50 subs r0, #1 +# CHECK-NEXT: 1 1 0.50 sub.w r0, r1, #1 +# CHECK-NEXT: 1 1 0.50 subs.w r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 subw r0, r1, #1 +# CHECK-NEXT: 1 1 1.00 subs r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 sub.w r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 subs.w r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 sub.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 2 1.00 subs.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1 1 1.00 sxtab r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 sxtab r0, r1, r2, ror #8 +# CHECK-NEXT: 1 1 1.00 sxtab16 r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 sxtab16 r0, r1, r2, ror #8 +# CHECK-NEXT: 1 1 1.00 sxtah r0, r1, r2 +# CHECK-NEXT: 1 1 1.00 sxtah r0, r1, r2, ror #8 +# CHECK-NEXT: 1 1 0.50 sxtb r0, r1 +# CHECK-NEXT: 1 1 0.50 sxtb.w r0, r1 +# CHECK-NEXT: 1 1 0.50 sxtb.w r0, r1, ror #8 +# CHECK-NEXT: 1 1 1.00 sxtb16 r0, r1 +# CHECK-NEXT: 1 1 1.00 sxtb16 r0, r1, ror #8 +# CHECK-NEXT: 1 1 0.50 sxth r0, r1 +# CHECK-NEXT: 1 1 0.50 sxth.w r0, r1 +# CHECK-NEXT: 1 1 0.50 sxth.w r0, r1, ror #8 +# CHECK-NEXT: 1 1 1.00 U tbb [r0, r1] +# CHECK-NEXT: 1 1 1.00 U tbh [r0, r1, lsl #1] +# CHECK-NEXT: 1 1 1.00 teq.w r0, #1 +# CHECK-NEXT: 1 1 1.00 teq.w r0, r1 +# CHECK-NEXT: 1 2 1.00 teq.w r0, r1, lsl #1 +# CHECK-NEXT: 1 1 1.00 tst.w r0, #1 +# CHECK-NEXT: 1 1 1.00 tst r0, r1 +# CHECK-NEXT: 1 1 1.00 tst.w r0, r1 +# CHECK-NEXT: 1 2 1.00 tst.w r0, r1, lsl #1 +# CHECK-NEXT: 1 2 1.00 * * U uadd16 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 * * U uadd8 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 * * U uasx r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 ubfx r0, r1, #1, #2 +# CHECK-NEXT: 1 2 1.00 udiv r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 uhadd16 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 uhadd8 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 uhasx r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 uhsax r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 uhsub16 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 uhsub8 r0, r1, r2 +# CHECK-NEXT: 2 2 1.00 umaal r0, r1, r2, r3 +# CHECK-NEXT: 2 2 1.00 umlal r0, r1, r2, r3 +# CHECK-NEXT: 2 2 1.00 umull r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 uqadd16 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 uqadd8 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 uqasx r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 uqrshl r0, r1 +# CHECK-NEXT: 1 2 1.00 uqrshll r0, r1, #48, r2 +# CHECK-NEXT: 1 2 1.00 uqsax r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 uqshl r0, #1 +# CHECK-NEXT: 1 2 1.00 uqshll r0, r1, #1 +# CHECK-NEXT: 1 2 1.00 uqsub16 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 uqsub8 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 urshr r0, #1 +# CHECK-NEXT: 1 2 1.00 urshrl r0, r1, #1 +# CHECK-NEXT: 1 2 1.00 usad8 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 usada8 r0, r1, r2, r3 +# CHECK-NEXT: 1 2 1.00 usat r0, #1, r1 +# CHECK-NEXT: 1 2 1.00 usat r0, #1, r1, lsl #1 +# CHECK-NEXT: 1 2 1.00 usat16 r0, #1, r1 +# CHECK-NEXT: 1 2 1.00 * * U usax r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 * * U usub16 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 * * U usub8 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 uxtab r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 uxtab r0, r1, r2, ror #8 +# CHECK-NEXT: 1 2 1.00 uxtab16 r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 uxtab16 r0, r1, r2, ror #8 +# CHECK-NEXT: 1 2 1.00 uxtah r0, r1, r2 +# CHECK-NEXT: 1 2 1.00 uxtah r0, r1, r2, ror #8 +# CHECK-NEXT: 1 1 0.50 uxtb r0, r1 +# CHECK-NEXT: 1 1 0.50 uxtb.w r0, r1 +# CHECK-NEXT: 1 1 0.50 uxtb.w r0, r1, ror #8 +# CHECK-NEXT: 1 1 1.00 uxtb16 r0, r1 +# CHECK-NEXT: 1 1 1.00 uxtb16 r0, r1, ror #8 +# CHECK-NEXT: 1 1 0.50 uxth r0, r1 +# CHECK-NEXT: 1 1 0.50 uxth.w r0, r1 +# CHECK-NEXT: 1 1 0.50 uxth.w r0, r1, ror #8 +# CHECK-NEXT: 1 1 1.00 * * U wfe +# CHECK-NEXT: 1 1 1.00 * * U wfi +# CHECK-NEXT: 1 1 1.00 * * U yield + +# CHECK: Resources: +# CHECK-NEXT: [0] - M55UnitALU +# CHECK-NEXT: [1] - M55UnitLoadStore +# CHECK-NEXT: [2] - M55UnitVecALU +# CHECK-NEXT: [3] - M55UnitVecFPALU +# CHECK-NEXT: [4] - M55UnitVecSys + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] +# CHECK-NEXT: 430.00 - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] Instructions: +# CHECK-NEXT: 1.00 - - - - adc r0, r1, #0 +# CHECK-NEXT: 1.00 - - - - adcs r0, r1, #0 +# CHECK-NEXT: 1.00 - - - - adcs r0, r1 +# CHECK-NEXT: 1.00 - - - - adc.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - adcs.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - adc.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - adcs.w r0, r1, r2, lsl #1 +# CHECK-NEXT: - - - - - add.w r0, sp, #1 +# CHECK-NEXT: 1.00 - - - - add.w sp, sp, #1 +# CHECK-NEXT: - - - - - add.w r0, sp, #1 +# CHECK-NEXT: - - - - - adds.w r0, sp, #1 +# CHECK-NEXT: 1.00 - - - - addw r0, sp, #1 +# CHECK-NEXT: 1.00 - - - - add r0, sp, r0 +# CHECK-NEXT: 1.00 - - - - add sp, r1 +# CHECK-NEXT: 1.00 - - - - add.w r0, sp, r1 +# CHECK-NEXT: 1.00 - - - - adds.w r0, sp, r1 +# CHECK-NEXT: 1.00 - - - - add.w r0, sp, r1, lsl #1 +# CHECK-NEXT: 1.00 - - - - adds.w r0, sp, r1, lsl #1 +# CHECK-NEXT: - - - - - adds r0, r1, #1 +# CHECK-NEXT: - - - - - adds r0, #42 +# CHECK-NEXT: - - - - - add.w r0, r1, #1 +# CHECK-NEXT: - - - - - adds.w r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - addw r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - adds r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - add r0, r1 +# CHECK-NEXT: 1.00 - - - - add.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - adds.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - add.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - adds.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - adr.w r0, #-6 +# CHECK-NEXT: 1.00 - - - - adr.w r8, #-6 +# CHECK-NEXT: 1.00 - - - - adr.w r0, #-6 +# CHECK-NEXT: 1.00 - - - - and r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - ands r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - ands r1, r0 +# CHECK-NEXT: 1.00 - - - - and.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - ands.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - and.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - ands.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - asrs r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - asr.w r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - asrs.w r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - asrs r0, r1 +# CHECK-NEXT: 1.00 - - - - asr.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - asrs.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - asrl r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - asrl r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - bfc r0, #1, #2 +# CHECK-NEXT: 1.00 - - - - bfi r0, r1, #1, #2 +# CHECK-NEXT: 1.00 - - - - bic r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - bics r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - bics r0, r1 +# CHECK-NEXT: 1.00 - - - - bic.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - bics.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - bic.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - bics.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - bkpt #1 +# CHECK-NEXT: 1.00 - - - - clrex +# CHECK-NEXT: 1.00 - - - - clrm {r1, r2} +# CHECK-NEXT: 1.00 - - - - clz r0, r1 +# CHECK-NEXT: 1.00 - - - - cmn.w r0, #1 +# CHECK-NEXT: 1.00 - - - - cmn r0, r1 +# CHECK-NEXT: 1.00 - - - - cmn.w r0, r1 +# CHECK-NEXT: 1.00 - - - - cmn.w r0, r1, lsl #1 +# CHECK-NEXT: 1.00 - - - - cmp r0, #1 +# CHECK-NEXT: 1.00 - - - - cmp.w r0, #1 +# CHECK-NEXT: 1.00 - - - - cmp r0, r1 +# CHECK-NEXT: 1.00 - - - - cmp r0, r10 +# CHECK-NEXT: 1.00 - - - - cmp.w r0, r1 +# CHECK-NEXT: 1.00 - - - - cmp.w r0, r1, lsl #1 +# CHECK-NEXT: 1.00 - - - - csel r1, r2, r3, eq +# CHECK-NEXT: 1.00 - - - - csinc r1, r2, r3, eq +# CHECK-NEXT: 1.00 - - - - csinv r1, r2, r3, eq +# CHECK-NEXT: 1.00 - - - - csneg r1, r2, r3, eq +# CHECK-NEXT: 1.00 - - - - dmb sy +# CHECK-NEXT: 1.00 - - - - dsb sy +# CHECK-NEXT: 1.00 - - - - eor r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - eors r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - eors r0, r1 +# CHECK-NEXT: 1.00 - - - - eor.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - eors.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - eor.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - eors.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - isb sy +# CHECK-NEXT: 1.00 - - - - lda r0, [r1] +# CHECK-NEXT: 1.00 - - - - ldab r0, [r1] +# CHECK-NEXT: 1.00 - - - - ldaex r0, [r1] +# CHECK-NEXT: 1.00 - - - - ldaexb r0, [r1] +# CHECK-NEXT: 1.00 - - - - ldaexh r0, [r1] +# CHECK-NEXT: 1.00 - - - - ldah r0, [r1] +# CHECK-NEXT: 1.00 - - - - ldm r0!, {r1} +# CHECK-NEXT: 1.00 - - - - ldm.w r0, {r1} +# CHECK-NEXT: 1.00 - - - - ldm.w r0, {r1} +# CHECK-NEXT: 1.00 - - - - ldr r1, [r0], #4 +# CHECK-NEXT: 1.00 - - - - ldmdb r0, {r1} +# CHECK-NEXT: 1.00 - - - - ldmdb r0!, {r1} +# CHECK-NEXT: 1.00 - - - - ldr r0, [r1, #4] +# CHECK-NEXT: 1.00 - - - - ldr r0, [sp, #4] +# CHECK-NEXT: 1.00 - - - - ldr.w r0, [r1, #4] +# CHECK-NEXT: 1.00 - - - - ldr r0, [r1, #-1] +# CHECK-NEXT: 1.00 - - - - ldr r0, [r1], #1 +# CHECK-NEXT: 1.00 - - - - ldr r0, [r1, #1]! +# CHECK-NEXT: 1.00 - - - - ldr r0, [pc, #4] +# CHECK-NEXT: 1.00 - - - - ldr.w r0, [pc, #4] +# CHECK-NEXT: 1.00 - - - - ldr r0, next +# CHECK-NEXT: 1.00 - - - - ldr.w r0, next +# CHECK-NEXT: 1.00 - - - - ldr r0, [r1, r2] +# CHECK-NEXT: 1.00 - - - - ldr.w r0, [r1, r2] +# CHECK-NEXT: 1.00 - - - - ldr.w r0, [r1, r2, lsl #1] +# CHECK-NEXT: 1.00 - - - - ldrb r0, [r1, #1] +# CHECK-NEXT: 1.00 - - - - ldrb.w r0, [r1, #1] +# CHECK-NEXT: 1.00 - - - - ldrb r0, [r1, #-1] +# CHECK-NEXT: 1.00 - - - - ldrb r0, [r1], #1 +# CHECK-NEXT: 1.00 - - - - ldrb r0, [r1, #1]! +# CHECK-NEXT: 1.00 - - - - ldrb.w r0, [pc, #4] +# CHECK-NEXT: 1.00 - - - - ldrb.w r0, next +# CHECK-NEXT: 1.00 - - - - ldrb r0, [r1, r2] +# CHECK-NEXT: 1.00 - - - - ldrb.w r0, [r1, r2] +# CHECK-NEXT: 1.00 - - - - ldrb.w r0, [r1, r2, lsl #1] +# CHECK-NEXT: 1.00 - - - - ldrbt r0, [r1, #1] +# CHECK-NEXT: 1.00 - - - - ldrd r0, r2, [r1] +# CHECK-NEXT: 1.00 - - - - ldrd r0, r2, [r1, #-4] +# CHECK-NEXT: 1.00 - - - - ldrd r0, r2, [r1], #4 +# CHECK-NEXT: 1.00 - - - - ldrd r0, r2, [r1, #4]! +# CHECK-NEXT: 1.00 - - - - ldrd r0, r2, next +# CHECK-NEXT: 1.00 - - - - ldrex r0, [r1] +# CHECK-NEXT: 1.00 - - - - ldrex r0, [r1, #4] +# CHECK-NEXT: 1.00 - - - - ldrexb r0, [r1] +# CHECK-NEXT: 1.00 - - - - ldrexh r0, [r1] +# CHECK-NEXT: 1.00 - - - - ldrh r0, [r1, #2] +# CHECK-NEXT: 1.00 - - - - ldrh.w r0, [r1, #1] +# CHECK-NEXT: 1.00 - - - - ldrh r0, [r1, #-1] +# CHECK-NEXT: 1.00 - - - - ldrh r0, [r1], #1 +# CHECK-NEXT: 1.00 - - - - ldrh r0, [r1, #1]! +# CHECK-NEXT: 1.00 - - - - ldrh.w r0, [pc, #4] +# CHECK-NEXT: 1.00 - - - - ldrh.w r0, next +# CHECK-NEXT: 1.00 - - - - ldrh r0, [r1, r2] +# CHECK-NEXT: 1.00 - - - - ldrh.w r0, [r1, r2] +# CHECK-NEXT: 1.00 - - - - ldrh.w r0, [r1, r2, lsl #1] +# CHECK-NEXT: 1.00 - - - - ldrht r0, [r1, #1] +# CHECK-NEXT: 1.00 - - - - ldrsb.w r0, [r1, #1] +# CHECK-NEXT: 1.00 - - - - ldrsb r0, [r1, #-1] +# CHECK-NEXT: 1.00 - - - - ldrsb r0, [r1], #1 +# CHECK-NEXT: 1.00 - - - - ldrsb r0, [r1, #1]! +# CHECK-NEXT: 1.00 - - - - ldrsb.w r0, [pc, #4] +# CHECK-NEXT: 1.00 - - - - ldrsb.w r0, next +# CHECK-NEXT: 1.00 - - - - ldrsb r0, [r1, r2] +# CHECK-NEXT: 1.00 - - - - ldrsb.w r0, [r1, r2] +# CHECK-NEXT: 1.00 - - - - ldrsb.w r0, [r1, r2, lsl #1] +# CHECK-NEXT: 1.00 - - - - ldrsbt r0, [r1, #1] +# CHECK-NEXT: 1.00 - - - - ldrsh.w r0, [r1, #2] +# CHECK-NEXT: 1.00 - - - - ldrsh r0, [r1, #-1] +# CHECK-NEXT: 1.00 - - - - ldrsh r0, [r1], #1 +# CHECK-NEXT: 1.00 - - - - ldrsh r0, [r1, #1]! +# CHECK-NEXT: 1.00 - - - - ldrsh.w r0, [pc, #4] +# CHECK-NEXT: 1.00 - - - - ldrsh.w r0, next +# CHECK-NEXT: 1.00 - - - - ldrsh r0, [r1, r2] +# CHECK-NEXT: 1.00 - - - - ldrsh.w r0, [r1, r2] +# CHECK-NEXT: 1.00 - - - - ldrsh.w r0, [r1, r2, lsl #1] +# CHECK-NEXT: 1.00 - - - - ldrsht r0, [r1, #1] +# CHECK-NEXT: 1.00 - - - - ldrt r0, [r1, #1] +# CHECK-NEXT: 1.00 - - - - lsls r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - lsl.w r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - lsls.w r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - lsls r0, r1 +# CHECK-NEXT: 1.00 - - - - lsl.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - lsls.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - lsll r0, r1, #2 +# CHECK-NEXT: 1.00 - - - - lsll r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - lsrs r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - lsr.w r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - lsrs.w r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - lsrs r0, r1 +# CHECK-NEXT: 1.00 - - - - lsr.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - lsrs.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - lsrl r0, r1, #2 +# CHECK-NEXT: 1.00 - - - - mla r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - mls r0, r1, r2, r3 +# CHECK-NEXT: - - - - - movs r0, #1 +# CHECK-NEXT: - - - - - mov.w r0, #1 +# CHECK-NEXT: - - - - - movs.w r0, #1 +# CHECK-NEXT: - - - - - movw r0, #1 +# CHECK-NEXT: - - - - - mov r0, r1 +# CHECK-NEXT: - - - - - mov.w r0, r1 +# CHECK-NEXT: - - - - - movs.w r0, r1 +# CHECK-NEXT: 1.00 - - - - movt r0, #1 +# CHECK-NEXT: 1.00 - - - - mrs r0, apsr +# CHECK-NEXT: 1.00 - - - - msr apsr_nzcvq, r0 +# CHECK-NEXT: 1.00 - - - - muls r1, r2, r1 +# CHECK-NEXT: 1.00 - - - - mul r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - mvn r0, #1 +# CHECK-NEXT: 1.00 - - - - mvns r0, #1 +# CHECK-NEXT: 1.00 - - - - mvns r0, r1 +# CHECK-NEXT: 1.00 - - - - mvn.w r0, r1 +# CHECK-NEXT: 1.00 - - - - mvns.w r0, r1 +# CHECK-NEXT: 1.00 - - - - mvn.w r0, r1, lsl #1 +# CHECK-NEXT: 1.00 - - - - mvns.w r0, r1, lsl #1 +# CHECK-NEXT: 1.00 - - - - nop +# CHECK-NEXT: 1.00 - - - - orn r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - orns r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - orn r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - orns r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - orn r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - orns r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - orr r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - orrs r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - orrs r0, r1 +# CHECK-NEXT: 1.00 - - - - orr.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - orrs.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - orr.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - orrs.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - pkhbt r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - pkhbt r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - pkhbt r0, r2, r1 +# CHECK-NEXT: 1.00 - - - - pkhtb r0, r1, r2, asr #1 +# CHECK-NEXT: 1.00 - - - - pop {r0} +# CHECK-NEXT: 1.00 - - - - pop.w {r0, r1} +# CHECK-NEXT: 1.00 - - - - ldr r0, [sp], #4 +# CHECK-NEXT: 1.00 - - - - pssbb +# CHECK-NEXT: 1.00 - - - - push {r0} +# CHECK-NEXT: 1.00 - - - - push.w {r0, r1} +# CHECK-NEXT: 1.00 - - - - str r0, [sp, #-4]! +# CHECK-NEXT: 1.00 - - - - qadd r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - qadd16 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - qadd8 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - qasx r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - qdadd r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - qdsub r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - qsax r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - qsub r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - qsub16 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - qsub8 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - rbit r0, r1 +# CHECK-NEXT: 1.00 - - - - rev r0, r1 +# CHECK-NEXT: 1.00 - - - - rev.w r0, r1 +# CHECK-NEXT: 1.00 - - - - rev16 r0, r1 +# CHECK-NEXT: 1.00 - - - - rev16.w r0, r1 +# CHECK-NEXT: 1.00 - - - - revsh r0, r1 +# CHECK-NEXT: 1.00 - - - - revsh.w r0, r1 +# CHECK-NEXT: 1.00 - - - - ror.w r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - rors.w r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - rors r0, r1 +# CHECK-NEXT: 1.00 - - - - ror.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - rors.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - rrx r0, r1 +# CHECK-NEXT: 1.00 - - - - rrxs r0, r1 +# CHECK-NEXT: 1.00 - - - - rsbs r0, r1, #0 +# CHECK-NEXT: 1.00 - - - - rsb.w r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - rsbs.w r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - rsb r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - rsbs r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - rsb r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - rsbs r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - sadd16 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - sadd8 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - sasx r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - sbc r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - sbcs r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - sbcs r0, r1 +# CHECK-NEXT: 1.00 - - - - sbc.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - sbcs.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - sbc.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - sbcs.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - sbfx r0, r1, #1, #2 +# CHECK-NEXT: 1.00 - - - - sdiv r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - sel r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - sev +# CHECK-NEXT: 1.00 - - - - shadd16 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - shadd8 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - shasx r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - shsax r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - shsub16 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - shsub8 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - smlabb r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlabt r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlatb r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlatt r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlad r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smladx r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlal r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlalbb r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlalbt r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlaltb r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlaltt r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlald r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlaldx r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlawb r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlawt r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlsd r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlsdx r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlsld r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smlsldx r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smmla r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smmlar r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smmls r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smmlsr r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smmul r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - smmulr r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - smuad r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - smuadx r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - smulbb r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - smulbt r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - smultb r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - smultt r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - smull r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - smulwb r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - smulwt r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - smusd r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - smusdx r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - sqrshr r0, r1 +# CHECK-NEXT: 1.00 - - - - sqrshrl r0, r1, #48, r2 +# CHECK-NEXT: 1.00 - - - - sqshl r0, #7 +# CHECK-NEXT: 1.00 - - - - sqshll r0, r1, #7 +# CHECK-NEXT: 1.00 - - - - srshr r0, #7 +# CHECK-NEXT: 1.00 - - - - srshrl r0, r1, #7 +# CHECK-NEXT: 1.00 - - - - ssat r0, #1, r2 +# CHECK-NEXT: 1.00 - - - - ssat r0, #1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - ssat16 r0, #1, r1 +# CHECK-NEXT: 1.00 - - - - ssax r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - ssbb +# CHECK-NEXT: 1.00 - - - - ssub16 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - ssub8 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - stl r0, [r1] +# CHECK-NEXT: 1.00 - - - - stlb r0, [r1] +# CHECK-NEXT: 1.00 - - - - stlex r0, r1, [r2] +# CHECK-NEXT: 1.00 - - - - stlexb r0, r1, [r2] +# CHECK-NEXT: 1.00 - - - - stlexh r0, r1, [r2] +# CHECK-NEXT: 1.00 - - - - stlh r0, [r1] +# CHECK-NEXT: 1.00 - - - - stm r0!, {r1} +# CHECK-NEXT: 1.00 - - - - stm.w r0, {r1} +# CHECK-NEXT: 1.00 - - - - stm.w r0!, {r1} +# CHECK-NEXT: 1.00 - - - - stmdb r0, {r1} +# CHECK-NEXT: 1.00 - - - - str r1, [r0, #-4]! +# CHECK-NEXT: 1.00 - - - - str r0, [r1] +# CHECK-NEXT: 1.00 - - - - str r0, [r1, #4] +# CHECK-NEXT: 1.00 - - - - str r0, [sp, #4] +# CHECK-NEXT: 1.00 - - - - str.w r0, [r1, #1] +# CHECK-NEXT: 1.00 - - - - str r0, [r1, #-1] +# CHECK-NEXT: 1.00 - - - - str r0, [r1], #1 +# CHECK-NEXT: 1.00 - - - - str r0, [r1, r2] +# CHECK-NEXT: 1.00 - - - - str.w r0, [r1, r2] +# CHECK-NEXT: 1.00 - - - - str.w r0, [r1, r2, lsl #1] +# CHECK-NEXT: 1.00 - - - - strb r0, [r1] +# CHECK-NEXT: 1.00 - - - - strb r0, [r1, #1] +# CHECK-NEXT: 1.00 - - - - strb.w r0, [r1, #1] +# CHECK-NEXT: 1.00 - - - - strb r0, [r1, #-1] +# CHECK-NEXT: 1.00 - - - - strb r0, [r1], #1 +# CHECK-NEXT: 1.00 - - - - strb r0, [r1, #1]! +# CHECK-NEXT: 1.00 - - - - strb r0, [r1, r2] +# CHECK-NEXT: 1.00 - - - - strb.w r0, [r1, r2] +# CHECK-NEXT: 1.00 - - - - strb.w r0, [r1, r2, lsl #1] +# CHECK-NEXT: 1.00 - - - - strbt r0, [r1, #1] +# CHECK-NEXT: 1.00 - - - - strd r0, r1, [r2, #4] +# CHECK-NEXT: 1.00 - - - - strd r0, r1, [r2], #4 +# CHECK-NEXT: 1.00 - - - - strd r0, r1, [r2, #4]! +# CHECK-NEXT: 1.00 - - - - strex r0, r1, [r2] +# CHECK-NEXT: 1.00 - - - - strex r0, r1, [r2, #4] +# CHECK-NEXT: 1.00 - - - - strexb r0, r1, [r2] +# CHECK-NEXT: 1.00 - - - - strexh r0, r1, [r2] +# CHECK-NEXT: 1.00 - - - - strh r0, [r1] +# CHECK-NEXT: 1.00 - - - - strh r0, [r1, #2] +# CHECK-NEXT: 1.00 - - - - strh.w r0, [r1, #2] +# CHECK-NEXT: 1.00 - - - - strh r0, [r1, #-1] +# CHECK-NEXT: 1.00 - - - - strh r0, [r1], #1 +# CHECK-NEXT: 1.00 - - - - strh r0, [r1, #1]! +# CHECK-NEXT: 1.00 - - - - strh r0, [r1, r2] +# CHECK-NEXT: 1.00 - - - - strh.w r0, [r1, r2] +# CHECK-NEXT: 1.00 - - - - strh.w r0, [r1, r2, lsl #1] +# CHECK-NEXT: 1.00 - - - - strht r0, [r1, #1] +# CHECK-NEXT: 1.00 - - - - strt r0, [r1, #1] +# CHECK-NEXT: 1.00 - - - - sub sp, #4 +# CHECK-NEXT: - - - - - sub.w r0, sp, #1 +# CHECK-NEXT: - - - - - subs.w r0, sp, #1 +# CHECK-NEXT: 1.00 - - - - subw r0, sp, #1 +# CHECK-NEXT: 1.00 - - - - sub.w r0, sp, r1 +# CHECK-NEXT: 1.00 - - - - subs.w r0, sp, r1 +# CHECK-NEXT: 1.00 - - - - sub.w r0, sp, r1, lsl #1 +# CHECK-NEXT: 1.00 - - - - subs.w r0, sp, r1, lsl #1 +# CHECK-NEXT: - - - - - subs r0, r1, #1 +# CHECK-NEXT: - - - - - subs r0, #1 +# CHECK-NEXT: - - - - - sub.w r0, r1, #1 +# CHECK-NEXT: - - - - - subs.w r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - subw r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - subs r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - sub.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - subs.w r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - sub.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - subs.w r0, r1, r2, lsl #1 +# CHECK-NEXT: 1.00 - - - - sxtab r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - sxtab r0, r1, r2, ror #8 +# CHECK-NEXT: 1.00 - - - - sxtab16 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - sxtab16 r0, r1, r2, ror #8 +# CHECK-NEXT: 1.00 - - - - sxtah r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - sxtah r0, r1, r2, ror #8 +# CHECK-NEXT: - - - - - sxtb r0, r1 +# CHECK-NEXT: - - - - - sxtb.w r0, r1 +# CHECK-NEXT: - - - - - sxtb.w r0, r1, ror #8 +# CHECK-NEXT: 1.00 - - - - sxtb16 r0, r1 +# CHECK-NEXT: 1.00 - - - - sxtb16 r0, r1, ror #8 +# CHECK-NEXT: - - - - - sxth r0, r1 +# CHECK-NEXT: - - - - - sxth.w r0, r1 +# CHECK-NEXT: - - - - - sxth.w r0, r1, ror #8 +# CHECK-NEXT: 1.00 - - - - tbb [r0, r1] +# CHECK-NEXT: 1.00 - - - - tbh [r0, r1, lsl #1] +# CHECK-NEXT: 1.00 - - - - teq.w r0, #1 +# CHECK-NEXT: 1.00 - - - - teq.w r0, r1 +# CHECK-NEXT: 1.00 - - - - teq.w r0, r1, lsl #1 +# CHECK-NEXT: 1.00 - - - - tst.w r0, #1 +# CHECK-NEXT: 1.00 - - - - tst r0, r1 +# CHECK-NEXT: 1.00 - - - - tst.w r0, r1 +# CHECK-NEXT: 1.00 - - - - tst.w r0, r1, lsl #1 +# CHECK-NEXT: 1.00 - - - - uadd16 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uadd8 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uasx r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - ubfx r0, r1, #1, #2 +# CHECK-NEXT: 1.00 - - - - udiv r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uhadd16 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uhadd8 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uhasx r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uhsax r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uhsub16 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uhsub8 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - umaal r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - umlal r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - umull r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - uqadd16 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uqadd8 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uqasx r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uqrshl r0, r1 +# CHECK-NEXT: 1.00 - - - - uqrshll r0, r1, #48, r2 +# CHECK-NEXT: 1.00 - - - - uqsax r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uqshl r0, #1 +# CHECK-NEXT: 1.00 - - - - uqshll r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - uqsub16 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uqsub8 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - urshr r0, #1 +# CHECK-NEXT: 1.00 - - - - urshrl r0, r1, #1 +# CHECK-NEXT: 1.00 - - - - usad8 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - usada8 r0, r1, r2, r3 +# CHECK-NEXT: 1.00 - - - - usat r0, #1, r1 +# CHECK-NEXT: 1.00 - - - - usat r0, #1, r1, lsl #1 +# CHECK-NEXT: 1.00 - - - - usat16 r0, #1, r1 +# CHECK-NEXT: 1.00 - - - - usax r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - usub16 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - usub8 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uxtab r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uxtab r0, r1, r2, ror #8 +# CHECK-NEXT: 1.00 - - - - uxtab16 r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uxtab16 r0, r1, r2, ror #8 +# CHECK-NEXT: 1.00 - - - - uxtah r0, r1, r2 +# CHECK-NEXT: 1.00 - - - - uxtah r0, r1, r2, ror #8 +# CHECK-NEXT: - - - - - uxtb r0, r1 +# CHECK-NEXT: - - - - - uxtb.w r0, r1 +# CHECK-NEXT: - - - - - uxtb.w r0, r1, ror #8 +# CHECK-NEXT: 1.00 - - - - uxtb16 r0, r1 +# CHECK-NEXT: 1.00 - - - - uxtb16 r0, r1, ror #8 +# CHECK-NEXT: - - - - - uxth r0, r1 +# CHECK-NEXT: - - - - - uxth.w r0, r1 +# CHECK-NEXT: - - - - - uxth.w r0, r1, ror #8 +# CHECK-NEXT: 1.00 - - - - wfe +# CHECK-NEXT: 1.00 - - - - wfi +# CHECK-NEXT: 1.00 - - - - yield diff --git a/llvm/test/tools/llvm-mca/ARM/m55-mve-fp.s b/llvm/test/tools/llvm-mca/ARM/m55-mve-fp.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/ARM/m55-mve-fp.s @@ -0,0 +1,315 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=thumbv8.1-m.main-none-none-eabi -mcpu=cortex-m55 -instruction-tables < %s | FileCheck %s + +vabd.f16 q0, q2, q1 +vabd.f32 q0, q2, q1 +vabs.f16 q0, q2 +vabs.f32 q0, q2 +vadd.f16 q0, q2, q1 +vadd.f32 q0, q2, q1 +vadd.f16 q0, q2, r0 +vadd.f32 q0, q2, r0 +vcadd.f16 q0, q2, q1, #90 +vcadd.f32 q0, q2, q1, #90 +vcmla.f16 q0, q2, q1, #90 +vcmla.f32 q0, q2, q1, #90 +vcmul.f16 q0, q2, q1, #90 +vcmul.f32 q0, q2, q1, #90 +vcvt.f16.s16 q0, q1, #4 +vcvt.f16.u16 q0, q1, #4 +vcvt.s16.f16 q0, q1, #4 +vcvt.u16.f16 q0, q1, #4 +vcvt.f32.s32 q0, q1, #4 +vcvt.f32.u32 q0, q1, #4 +vcvt.s32.f32 q0, q1, #4 +vcvt.u32.f32 q0, q1, #4 +vcvt.f16.s16 q0, q1 +vcvt.f32.s32 q0, q1 +vcvt.f16.u16 q0, q1 +vcvt.f32.u32 q0, q1 +vcvt.s16.f16 q0, q1 +vcvt.s32.f32 q0, q1 +vcvt.u16.f16 q0, q1 +vcvt.u32.f32 q0, q1 +vcvtb.f16.f32 q0, q1 +vcvtb.f32.f16 q0, q1 +vcvtt.f16.f32 q0, q1 +vcvtt.f32.f16 q0, q1 +vcvta.s16.f16 q0, q1 +vcvta.s32.f32 q0, q1 +vcvta.u16.f16 q0, q1 +vcvta.u32.f32 q0, q1 +vcvtm.s16.f16 q0, q1 +vcvtm.s32.f32 q0, q1 +vcvtm.u16.f16 q0, q1 +vcvtm.u32.f32 q0, q1 +vcvtn.s16.f16 q0, q1 +vcvtn.s32.f32 q0, q1 +vcvtn.u16.f16 q0, q1 +vcvtn.u32.f32 q0, q1 +vcvtp.s16.f16 q0, q1 +vcvtp.s32.f32 q0, q1 +vcvtp.u16.f16 q0, q1 +vcvtp.u32.f32 q0, q1 +vfma.f16 q0, q2, r0 +vfma.f32 q0, q2, r0 +vfma.f16 q0, q2, q1 +vfma.f32 q0, q2, q1 +vfms.f16 q0, q2, q1 +vfms.f32 q0, q2, q1 +vfmas.f16 q0, q2, r0 +vfmas.f32 q0, q2, r0 +vmaxnm.f16 q0, q2, q1 +vmaxnm.f32 q0, q2, q1 +vmaxnma.f16 q0, q2 +vmaxnma.f32 q0, q2 +vmaxnmv.f16 r0, q2 +vmaxnmv.f32 r0, q2 +vmaxnmav.f16 r0, q2 +vmaxnmav.f32 r0, q2 +vminnm.f16 q0, q2, q1 +vminnm.f32 q0, q2, q1 +vminnma.f16 q0, q2 +vminnma.f32 q0, q2 +vminnmv.f16 r0, q2 +vminnmv.f32 r0, q2 +vminnmav.f16 r0, q2 +vminnmav.f32 r0, q2 +vmul.f16 q0, q2, q1 +vmul.f32 q0, q2, q1 +vmul.f16 q0, q2, r0 +vmul.f32 q0, q2, r0 +vneg.f16 q0, q2 +vneg.f32 q0, q2 +vrinta.f16 q0, q2 +vrinta.f32 q0, q2 +vrintm.f16 q0, q2 +vrintm.f32 q0, q2 +vrintn.f16 q0, q2 +vrintn.f32 q0, q2 +vrintp.f16 q0, q2 +vrintp.f32 q0, q2 +vrintx.f16 q0, q2 +vrintx.f32 q0, q2 +vrintz.f16 q0, q2 +vrintz.f32 q0, q2 +vsub.f16 q0, q2, q1 +vsub.f32 q0, q2, q1 +vsub.f16 q0, q2, r0 +vsub.f32 q0, q2, r0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 2.00 vabd.f16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vabd.f32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vabs.f16 q0, q2 +# CHECK-NEXT: 1 1 2.00 vabs.f32 q0, q2 +# CHECK-NEXT: 1 1 2.00 vadd.f16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vadd.f32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vadd.f16 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vadd.f32 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vcadd.f16 q0, q2, q1, #90 +# CHECK-NEXT: 1 1 2.00 vcadd.f32 q0, q2, q1, #90 +# CHECK-NEXT: 1 2 2.00 vcmla.f16 q0, q2, q1, #90 +# CHECK-NEXT: 1 2 2.00 vcmla.f32 q0, q2, q1, #90 +# CHECK-NEXT: 1 2 2.00 vcmul.f16 q0, q2, q1, #90 +# CHECK-NEXT: 1 2 2.00 vcmul.f32 q0, q2, q1, #90 +# CHECK-NEXT: 1 2 2.00 vcvt.f16.s16 q0, q1, #4 +# CHECK-NEXT: 1 2 2.00 vcvt.f16.u16 q0, q1, #4 +# CHECK-NEXT: 1 2 2.00 vcvt.s16.f16 q0, q1, #4 +# CHECK-NEXT: 1 2 2.00 vcvt.u16.f16 q0, q1, #4 +# CHECK-NEXT: 1 2 2.00 vcvt.f32.s32 q0, q1, #4 +# CHECK-NEXT: 1 2 2.00 vcvt.f32.u32 q0, q1, #4 +# CHECK-NEXT: 1 2 2.00 vcvt.s32.f32 q0, q1, #4 +# CHECK-NEXT: 1 2 2.00 vcvt.u32.f32 q0, q1, #4 +# CHECK-NEXT: 1 2 2.00 vcvt.f16.s16 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvt.f32.s32 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvt.f16.u16 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvt.f32.u32 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvt.s16.f16 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvt.s32.f32 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvt.u16.f16 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvt.u32.f32 q0, q1 +# CHECK-NEXT: 1 3 2.00 vcvtb.f16.f32 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvtb.f32.f16 q0, q1 +# CHECK-NEXT: 1 3 2.00 vcvtt.f16.f32 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvtt.f32.f16 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvta.s16.f16 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvta.s32.f32 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvta.u16.f16 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvta.u32.f32 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvtm.s16.f16 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvtm.s32.f32 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvtm.u16.f16 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvtm.u32.f32 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvtn.s16.f16 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvtn.s32.f32 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvtn.u16.f16 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvtn.u32.f32 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvtp.s16.f16 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvtp.s32.f32 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvtp.u16.f16 q0, q1 +# CHECK-NEXT: 1 2 2.00 vcvtp.u32.f32 q0, q1 +# CHECK-NEXT: 1 2 2.00 vfma.f16 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vfma.f32 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vfma.f16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vfma.f32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vfms.f16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vfms.f32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vfmas.f16 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vfmas.f32 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vmaxnm.f16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vmaxnm.f32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vmaxnma.f16 q0, q2 +# CHECK-NEXT: 1 1 2.00 vmaxnma.f32 q0, q2 +# CHECK-NEXT: 1 1 2.00 vmaxnmv.f16 r0, q2 +# CHECK-NEXT: 1 1 2.00 vmaxnmv.f32 r0, q2 +# CHECK-NEXT: 1 1 2.00 vmaxnmav.f16 r0, q2 +# CHECK-NEXT: 1 1 2.00 vmaxnmav.f32 r0, q2 +# CHECK-NEXT: 1 1 2.00 vminnm.f16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vminnm.f32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vminnma.f16 q0, q2 +# CHECK-NEXT: 1 1 2.00 vminnma.f32 q0, q2 +# CHECK-NEXT: 1 1 2.00 vminnmv.f16 r0, q2 +# CHECK-NEXT: 1 1 2.00 vminnmv.f32 r0, q2 +# CHECK-NEXT: 1 1 2.00 vminnmav.f16 r0, q2 +# CHECK-NEXT: 1 1 2.00 vminnmav.f32 r0, q2 +# CHECK-NEXT: 1 2 2.00 vmul.f16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmul.f32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmul.f16 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vmul.f32 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vneg.f16 q0, q2 +# CHECK-NEXT: 1 1 2.00 vneg.f32 q0, q2 +# CHECK-NEXT: 1 2 2.00 vrinta.f16 q0, q2 +# CHECK-NEXT: 1 2 2.00 vrinta.f32 q0, q2 +# CHECK-NEXT: 1 2 2.00 vrintm.f16 q0, q2 +# CHECK-NEXT: 1 2 2.00 vrintm.f32 q0, q2 +# CHECK-NEXT: 1 2 2.00 vrintn.f16 q0, q2 +# CHECK-NEXT: 1 2 2.00 vrintn.f32 q0, q2 +# CHECK-NEXT: 1 2 2.00 vrintp.f16 q0, q2 +# CHECK-NEXT: 1 2 2.00 vrintp.f32 q0, q2 +# CHECK-NEXT: 1 2 2.00 vrintx.f16 q0, q2 +# CHECK-NEXT: 1 2 2.00 vrintx.f32 q0, q2 +# CHECK-NEXT: 1 2 2.00 vrintz.f16 q0, q2 +# CHECK-NEXT: 1 2 2.00 vrintz.f32 q0, q2 +# CHECK-NEXT: 1 1 2.00 vsub.f16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vsub.f32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vsub.f16 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vsub.f32 q0, q2, r0 + +# CHECK: Resources: +# CHECK-NEXT: [0] - M55UnitALU +# CHECK-NEXT: [1] - M55UnitLoadStore +# CHECK-NEXT: [2] - M55UnitVecALU +# CHECK-NEXT: [3] - M55UnitVecFPALU +# CHECK-NEXT: [4] - M55UnitVecSys + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] +# CHECK-NEXT: - - - 192.00 - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] Instructions: +# CHECK-NEXT: - - - 2.00 - vabd.f16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vabd.f32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vabs.f16 q0, q2 +# CHECK-NEXT: - - - 2.00 - vabs.f32 q0, q2 +# CHECK-NEXT: - - - 2.00 - vadd.f16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vadd.f32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vadd.f16 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vadd.f32 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vcadd.f16 q0, q2, q1, #90 +# CHECK-NEXT: - - - 2.00 - vcadd.f32 q0, q2, q1, #90 +# CHECK-NEXT: - - - 2.00 - vcmla.f16 q0, q2, q1, #90 +# CHECK-NEXT: - - - 2.00 - vcmla.f32 q0, q2, q1, #90 +# CHECK-NEXT: - - - 2.00 - vcmul.f16 q0, q2, q1, #90 +# CHECK-NEXT: - - - 2.00 - vcmul.f32 q0, q2, q1, #90 +# CHECK-NEXT: - - - 2.00 - vcvt.f16.s16 q0, q1, #4 +# CHECK-NEXT: - - - 2.00 - vcvt.f16.u16 q0, q1, #4 +# CHECK-NEXT: - - - 2.00 - vcvt.s16.f16 q0, q1, #4 +# CHECK-NEXT: - - - 2.00 - vcvt.u16.f16 q0, q1, #4 +# CHECK-NEXT: - - - 2.00 - vcvt.f32.s32 q0, q1, #4 +# CHECK-NEXT: - - - 2.00 - vcvt.f32.u32 q0, q1, #4 +# CHECK-NEXT: - - - 2.00 - vcvt.s32.f32 q0, q1, #4 +# CHECK-NEXT: - - - 2.00 - vcvt.u32.f32 q0, q1, #4 +# CHECK-NEXT: - - - 2.00 - vcvt.f16.s16 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvt.f32.s32 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvt.f16.u16 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvt.f32.u32 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvt.s16.f16 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvt.s32.f32 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvt.u16.f16 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvt.u32.f32 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvtb.f16.f32 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvtb.f32.f16 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvtt.f16.f32 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvtt.f32.f16 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvta.s16.f16 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvta.s32.f32 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvta.u16.f16 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvta.u32.f32 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvtm.s16.f16 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvtm.s32.f32 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvtm.u16.f16 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvtm.u32.f32 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvtn.s16.f16 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvtn.s32.f32 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvtn.u16.f16 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvtn.u32.f32 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvtp.s16.f16 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvtp.s32.f32 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvtp.u16.f16 q0, q1 +# CHECK-NEXT: - - - 2.00 - vcvtp.u32.f32 q0, q1 +# CHECK-NEXT: - - - 2.00 - vfma.f16 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vfma.f32 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vfma.f16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vfma.f32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vfms.f16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vfms.f32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vfmas.f16 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vfmas.f32 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vmaxnm.f16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmaxnm.f32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmaxnma.f16 q0, q2 +# CHECK-NEXT: - - - 2.00 - vmaxnma.f32 q0, q2 +# CHECK-NEXT: - - - 2.00 - vmaxnmv.f16 r0, q2 +# CHECK-NEXT: - - - 2.00 - vmaxnmv.f32 r0, q2 +# CHECK-NEXT: - - - 2.00 - vmaxnmav.f16 r0, q2 +# CHECK-NEXT: - - - 2.00 - vmaxnmav.f32 r0, q2 +# CHECK-NEXT: - - - 2.00 - vminnm.f16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vminnm.f32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vminnma.f16 q0, q2 +# CHECK-NEXT: - - - 2.00 - vminnma.f32 q0, q2 +# CHECK-NEXT: - - - 2.00 - vminnmv.f16 r0, q2 +# CHECK-NEXT: - - - 2.00 - vminnmv.f32 r0, q2 +# CHECK-NEXT: - - - 2.00 - vminnmav.f16 r0, q2 +# CHECK-NEXT: - - - 2.00 - vminnmav.f32 r0, q2 +# CHECK-NEXT: - - - 2.00 - vmul.f16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmul.f32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmul.f16 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vmul.f32 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vneg.f16 q0, q2 +# CHECK-NEXT: - - - 2.00 - vneg.f32 q0, q2 +# CHECK-NEXT: - - - 2.00 - vrinta.f16 q0, q2 +# CHECK-NEXT: - - - 2.00 - vrinta.f32 q0, q2 +# CHECK-NEXT: - - - 2.00 - vrintm.f16 q0, q2 +# CHECK-NEXT: - - - 2.00 - vrintm.f32 q0, q2 +# CHECK-NEXT: - - - 2.00 - vrintn.f16 q0, q2 +# CHECK-NEXT: - - - 2.00 - vrintn.f32 q0, q2 +# CHECK-NEXT: - - - 2.00 - vrintp.f16 q0, q2 +# CHECK-NEXT: - - - 2.00 - vrintp.f32 q0, q2 +# CHECK-NEXT: - - - 2.00 - vrintx.f16 q0, q2 +# CHECK-NEXT: - - - 2.00 - vrintx.f32 q0, q2 +# CHECK-NEXT: - - - 2.00 - vrintz.f16 q0, q2 +# CHECK-NEXT: - - - 2.00 - vrintz.f32 q0, q2 +# CHECK-NEXT: - - - 2.00 - vsub.f16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vsub.f32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vsub.f16 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vsub.f32 q0, q2, r0 diff --git a/llvm/test/tools/llvm-mca/ARM/m55-mve-int.s b/llvm/test/tools/llvm-mca/ARM/m55-mve-int.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/ARM/m55-mve-int.s @@ -0,0 +1,1566 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=thumbv8.1-m.main-none-none-eabi -mcpu=cortex-m55 -instruction-tables < %s | FileCheck %s + +vabav.s8 r0, q2, q1 +vabav.u8 r0, q2, q1 +vabav.s16 r0, q2, q1 +vabav.u16 r0, q2, q1 +vabav.s32 r0, q2, q1 +vabav.u32 r0, q2, q1 +vabd.s8 q0, q2, q1 +vabd.u8 q0, q2, q1 +vabd.s16 q0, q2, q1 +vabd.u16 q0, q2, q1 +vabd.s32 q0, q2, q1 +vabd.u32 q0, q2, q1 +vabs.s8 q0, q2 +vabs.s16 q0, q2 +vabs.s32 q0, q2 +vadc.i32 q0, q2, q1 +vadci.i32 q0, q2, q1 +vadd.i8 q0, q2, q1 +vadd.i16 q0, q2, q1 +vadd.i32 q0, q2, q1 +vadd.i8 q0, q2, r0 +vadd.i16 q0, q2, r0 +vadd.i32 q0, q2, r0 +vaddlv.s32 r0, r1, q1 +vaddlv.u32 r0, r1, q1 +vaddlva.s32 r0, r1, q1 +vaddlva.u32 r0, r1, q1 +vaddv.s8 r0, q1 +vaddv.u8 r0, q1 +vaddv.s16 r0, q1 +vaddv.u16 r0, q1 +vaddv.s32 r0, q1 +vaddv.u32 r0, q1 +vaddva.s8 r0, q1 +vaddva.u8 r0, q1 +vaddva.s16 r0, q1 +vaddva.u16 r0, q1 +vaddva.s32 r0, q1 +vaddva.u32 r0, q1 +vand q0, q2, q1 +vbic.i16 q0, #10 +vbic.i32 q0, #10 +vbic q0, q2, q1 +vbrsr.8 q0, q2, r0 +vbrsr.16 q0, q2, r0 +vbrsr.32 q0, q2, r0 +vcadd.i8 q0, q2, q1, #90 +vcadd.i16 q0, q2, q1, #90 +vcadd.i32 q0, q2, q1, #90 +vcls.s8 q0, q2 +vcls.s16 q0, q2 +vcls.s32 q0, q2 +vclz.i8 q0, q2 +vclz.i16 q0, q2 +vclz.i32 q0, q2 +vdwdup.u8 q0, r0, r1, #4 +vdwdup.u16 q0, r0, r1, #4 +vdwdup.u32 q0, r0, r1, #4 +vddup.u8 q0, r0, #4 +vddup.u16 q0, r0, #4 +vddup.u32 q0, r0, #4 +vdup.8 q0, r0 +vdup.16 q0, r0 +vdup.32 q0, r0 +veor q0, q2, q1 +vhadd.s8 q0, q2, q1 +vhadd.u8 q0, q2, q1 +vhadd.s16 q0, q2, q1 +vhadd.u16 q0, q2, q1 +vhadd.s32 q0, q2, q1 +vhadd.u32 q0, q2, q1 +vhadd.s8 q0, q2, r0 +vhadd.u8 q0, q2, r0 +vhadd.s16 q0, q2, r0 +vhadd.u16 q0, q2, r0 +vhadd.s32 q0, q2, r0 +vhadd.u32 q0, q2, r0 +vhcadd.s8 q0, q2, q1, #90 +vhcadd.s16 q0, q2, q1, #90 +vhcadd.s32 q0, q2, q1, #90 +vhsub.s8 q0, q2, q1 +vhsub.u8 q0, q2, q1 +vhsub.s16 q0, q2, q1 +vhsub.u16 q0, q2, q1 +vhsub.s32 q0, q2, q1 +vhsub.u32 q0, q2, q1 +vhsub.s8 q0, q2, r0 +vhsub.u8 q0, q2, r0 +vhsub.s16 q0, q2, r0 +vhsub.u16 q0, q2, r0 +vhsub.s32 q0, q2, r0 +vhsub.u32 q0, q2, r0 +viwdup.u8 q0, r0, r1, #4 +viwdup.u16 q0, r0, r1, #4 +viwdup.u32 q0, r0, r1, #4 +vidup.u8 q0, r0, #4 +vidup.u16 q0, r0, #4 +vidup.u32 q0, r0, #4 +vmax.s8 q0, q2, q1 +vmax.u8 q0, q2, q1 +vmax.s16 q0, q2, q1 +vmax.u16 q0, q2, q1 +vmax.s32 q0, q2, q1 +vmax.u32 q0, q2, q1 +vmaxa.s8 q0, q2 +vmaxa.s16 q0, q2 +vmaxa.s32 q0, q2 +vmaxv.s8 r0, q2 +vmaxv.u8 r0, q2 +vmaxv.s16 r0, q2 +vmaxv.u16 r0, q2 +vmaxv.s32 r0, q2 +vmaxv.u32 r0, q2 +vmaxav.s8 r0, q2 +vmaxav.s16 r0, q2 +vmaxav.s32 r0, q2 +vmin.s8 q0, q2, q1 +vmin.u8 q0, q2, q1 +vmin.s16 q0, q2, q1 +vmin.u16 q0, q2, q1 +vmin.s32 q0, q2, q1 +vmin.u32 q0, q2, q1 +vmina.s8 q0, q2 +vmina.s16 q0, q2 +vmina.s32 q0, q2 +vminv.s8 r0, q2 +vminv.u8 r0, q2 +vminv.s16 r0, q2 +vminv.u16 r0, q2 +vminv.s32 r0, q2 +vminv.u32 r0, q2 +vminav.s8 r0, q2 +vminav.s16 r0, q2 +vminav.s32 r0, q2 +vmla.i8 q0, q2, r0 +vmla.i16 q0, q2, r0 +vmla.i32 q0, q2, r0 +vmladav.s8 r0, q2, q1 +vmladav.u8 r0, q2, q1 +vmladav.s16 r0, q2, q1 +vmladav.u16 r0, q2, q1 +vmladav.s32 r0, q2, q1 +vmladav.u32 r0, q2, q1 +vmladava.s8 r0, q2, q1 +vmladava.u8 r0, q2, q1 +vmladava.s16 r0, q2, q1 +vmladava.u16 r0, q2, q1 +vmladava.s32 r0, q2, q1 +vmladava.u32 r0, q2, q1 +vmladavax.s8 r0, q2, q1 +vmladavax.s16 r0, q2, q1 +vmladavax.s32 r0, q2, q1 +vmladavx.s8 r0, q2, q1 +vmladavx.s16 r0, q2, q1 +vmladavx.s32 r0, q2, q1 +vmlaldav.s16 r0, r1, q2, q1 +vmlaldav.u16 r0, r1, q2, q1 +vmlaldav.s32 r0, r1, q2, q1 +vmlaldav.u32 r0, r1, q2, q1 +vmlaldava.s16 r0, r1, q2, q1 +vmlaldava.u16 r0, r1, q2, q1 +vmlaldava.s32 r0, r1, q2, q1 +vmlaldava.u32 r0, r1, q2, q1 +vmlaldavax.s16 r0, r1, q2, q1 +vmlaldavax.s32 r0, r1, q2, q1 +vmlaldavx.s16 r0, r1, q2, q1 +vmlaldavx.s32 r0, r1, q2, q1 +vmlas.i8 q0, q2, r0 +vmlas.i16 q0, q2, r0 +vmlas.i32 q0, q2, r0 +vmlsdav.s8 r0, q2, q1 +vmlsdav.s16 r0, q2, q1 +vmlsdav.s32 r0, q2, q1 +vmlsdava.s8 r0, q2, q1 +vmlsdava.s16 r0, q2, q1 +vmlsdava.s32 r0, q2, q1 +vmlsdavax.s8 r0, q2, q1 +vmlsdavax.s16 r0, q2, q1 +vmlsdavax.s32 r0, q2, q1 +vmlsdavx.s8 r0, q2, q1 +vmlsdavx.s16 r0, q2, q1 +vmlsdavx.s32 r0, q2, q1 +vmlsldav.s16 r0, r1, q2, q1 +vmlsldav.s32 r0, r1, q2, q1 +vmlsldava.s16 r0, r1, q2, q1 +vmlsldava.s32 r0, r1, q2, q1 +vmlsldavax.s16 r0, r1, q2, q1 +vmlsldavax.s32 r0, r1, q2, q1 +vmlsldavx.s16 r0, r1, q2, q1 +vmlsldavx.s32 r0, r1, q2, q1 +vmov.8 q0[1], r0 +vmov.16 q0[1], r0 +vmov.32 q0[1], r0 +vmov.i8 q0, #0 +vmov.i16 q0, #0 +vmov.i32 q0, #0 +vmov.i64 q0, #0 +vmov.f32 q0, #1.0 +vmov r1, r2, q0[2], q0[0] +vmov q0[2], q0[0], r1, r2 +vmov.32 r0, q0[1] +vmov.s16 r0, q0[1] +vmov.u16 r0, q0[1] +vmov.s8 r0, q0[1] +vmov.u8 r0, q0[1] +vmovlb.s8 q0, q1 +vmovlb.u8 q0, q1 +vmovlb.s16 q0, q1 +vmovlb.u16 q0, q1 +vmovlt.s8 q0, q1 +vmovlt.u8 q0, q1 +vmovlt.s16 q0, q1 +vmovlt.u16 q0, q1 +vmovnb.i16 q0, q1 +vmovnb.i32 q0, q1 +vmovnt.i16 q0, q1 +vmovnt.i32 q0, q1 +vmul.i8 q0, q2, q1 +vmul.i16 q0, q2, q1 +vmul.i32 q0, q2, q1 +vmul.i8 q0, q2, r0 +vmul.i16 q0, q2, r0 +vmul.i32 q0, q2, r0 +vmulh.s8 q0, q2, q1 +vmulh.u8 q0, q2, q1 +vmulh.s16 q0, q2, q1 +vmulh.u16 q0, q2, q1 +vmulh.s32 q0, q2, q1 +vmulh.u32 q0, q2, q1 +vrmulh.s8 q0, q2, q1 +vrmulh.u8 q0, q2, q1 +vrmulh.s16 q0, q2, q1 +vrmulh.u16 q0, q2, q1 +vrmulh.s32 q0, q2, q1 +vrmulh.u32 q0, q2, q1 +vmullb.s8 q0, q2, q1 +vmullb.u8 q0, q2, q1 +vmullb.s16 q0, q2, q1 +vmullb.u16 q0, q2, q1 +vmullb.s32 q0, q2, q1 +vmullb.u32 q0, q2, q1 +vmullt.s8 q0, q2, q1 +vmullt.u8 q0, q2, q1 +vmullt.s16 q0, q2, q1 +vmullt.u16 q0, q2, q1 +vmullt.s32 q0, q2, q1 +vmullt.u32 q0, q2, q1 +vmullb.p8 q0, q2, q1 +vmullb.p16 q0, q2, q1 +vmullt.p8 q0, q2, q1 +vmullt.p16 q0, q2, q1 +vmvn.i16 q0, #10 +vmvn.i32 q0, #10 +vmvn q0, q2 +vneg.s8 q0, q2 +vneg.s16 q0, q2 +vneg.s32 q0, q2 +vorn q0, q2, q1 +vorr.i16 q0, #10 +vorr.i32 q0, #10 +vorr q0, q2, q1 +vpsel q0, q2, q1 +vqabs.s8 q0, q2 +vqabs.s16 q0, q2 +vqabs.s32 q0, q2 +vqadd.s8 q0, q2, q1 +vqadd.u8 q0, q2, q1 +vqadd.s16 q0, q2, q1 +vqadd.u16 q0, q2, q1 +vqadd.s32 q0, q2, q1 +vqadd.u32 q0, q2, q1 +vqadd.s8 q0, q2, r0 +vqadd.u8 q0, q2, r0 +vqadd.s16 q0, q2, r0 +vqadd.u16 q0, q2, r0 +vqadd.s32 q0, q2, r0 +vqadd.u32 q0, q2, r0 +vqdmladh.s8 q0, q2, q1 +vqdmladh.s16 q0, q2, q1 +vqdmladh.s32 q0, q2, q1 +vqdmladhx.s8 q0, q2, q1 +vqdmladhx.s16 q0, q2, q1 +vqdmladhx.s32 q0, q2, q1 +vqrdmladh.s8 q0, q2, q1 +vqrdmladh.s16 q0, q2, q1 +vqrdmladh.s32 q0, q2, q1 +vqrdmladhx.s8 q0, q2, q1 +vqrdmladhx.s16 q0, q2, q1 +vqrdmladhx.s32 q0, q2, q1 +vqdmlah.s8 q0, q2, r0 +vqdmlah.s16 q0, q2, r0 +vqdmlah.s32 q0, q2, r0 +vqrdmlah.s8 q0, q2, r0 +vqrdmlah.s16 q0, q2, r0 +vqrdmlah.s32 q0, q2, r0 +vqdmlash.s8 q0, q2, r0 +vqdmlash.s16 q0, q2, r0 +vqdmlash.s32 q0, q2, r0 +vqrdmlash.s8 q0, q2, r0 +vqrdmlash.s16 q0, q2, r0 +vqrdmlash.s32 q0, q2, r0 +vqdmlsdh.s8 q0, q2, q1 +vqdmlsdh.s16 q0, q2, q1 +vqdmlsdh.s32 q0, q2, q1 +vqdmlsdhx.s8 q0, q2, q1 +vqdmlsdhx.s16 q0, q2, q1 +vqdmlsdhx.s32 q0, q2, q1 +vqrdmlsdh.s8 q0, q2, q1 +vqrdmlsdh.s16 q0, q2, q1 +vqrdmlsdh.s32 q0, q2, q1 +vqrdmlsdhx.s8 q0, q2, q1 +vqrdmlsdhx.s16 q0, q2, q1 +vqrdmlsdhx.s32 q0, q2, q1 +vqdmulh.s8 q0, q2, q1 +vqdmulh.s16 q0, q2, q1 +vqdmulh.s32 q0, q2, q1 +vqrdmulh.s8 q0, q2, q1 +vqrdmulh.s16 q0, q2, q1 +vqrdmulh.s32 q0, q2, q1 +vqdmulh.s8 q0, q2, r0 +vqdmulh.s16 q0, q2, r0 +vqdmulh.s32 q0, q2, r0 +vqrdmulh.s8 q0, q2, r0 +vqrdmulh.s16 q0, q2, r0 +vqrdmulh.s32 q0, q2, r0 +vqdmullt.s16 q0, q2, q1 +vqdmullt.s32 q0, q2, q1 +vqdmullb.s16 q0, q2, r0 +vqdmullb.s32 q0, q2, r0 +vqmovnt.s16 q0, q2 +vqmovnt.u16 q0, q2 +vqmovnt.s32 q0, q2 +vqmovnt.u32 q0, q2 +vqmovnb.s16 q0, q2 +vqmovnb.u16 q0, q2 +vqmovnb.s32 q0, q2 +vqmovnb.u32 q0, q2 +vqmovunt.s16 q0, q2 +vqmovunt.s32 q0, q2 +vqmovunb.s16 q0, q2 +vqmovunb.s32 q0, q2 +vqneg.s8 q0, q2 +vqneg.s16 q0, q2 +vqneg.s32 q0, q2 +vqrshl.s8 q0, q2, q1 +vqrshl.u8 q0, q2, q1 +vqrshl.s16 q0, q2, q1 +vqrshl.u16 q0, q2, q1 +vqrshl.s32 q0, q2, q1 +vqrshl.u32 q0, q2, q1 +vqrshl.s8 q0, r0 +vqrshl.u8 q0, r0 +vqrshl.s16 q0, r0 +vqrshl.u16 q0, r0 +vqrshl.s32 q0, r0 +vqrshl.u32 q0, r0 +vqrshrnb.s16 q0, q2, #5 +vqrshrnb.u16 q0, q2, #5 +vqrshrnb.s32 q0, q2, #5 +vqrshrnb.u32 q0, q2, #5 +vqrshrnt.s16 q0, q2, #5 +vqrshrnt.u16 q0, q2, #5 +vqrshrnt.s32 q0, q2, #5 +vqrshrnt.u32 q0, q2, #5 +vqrshrunb.s16 q0, q2, #5 +vqrshrunb.s32 q0, q2, #5 +vqrshrunt.s16 q0, q2, #5 +vqrshrunt.s32 q0, q2, #5 +vqshl.s8 q0, r0 +vqshl.u8 q0, r0 +vqshl.s16 q0, r0 +vqshl.u16 q0, r0 +vqshl.s32 q0, r0 +vqshl.u32 q0, r0 +vqshl.s8 q0, q2, #5 +vqshl.u8 q0, q2, #5 +vqshl.s16 q0, q2, #5 +vqshl.u16 q0, q2, #5 +vqshl.s32 q0, q2, #5 +vqshl.u32 q0, q2, #5 +vqshlu.s8 q0, q2, #5 +vqshlu.s16 q0, q2, #5 +vqshlu.s32 q0, q2, #5 +vqshl.s8 q0, q2, q1 +vqshl.u8 q0, q2, q1 +vqshl.s16 q0, q2, q1 +vqshl.u16 q0, q2, q1 +vqshl.s32 q0, q2, q1 +vqshl.u32 q0, q2, q1 +vqshrnb.s16 q0, q2, #5 +vqshrnb.u16 q0, q2, #5 +vqshrnb.s32 q0, q2, #5 +vqshrnb.u32 q0, q2, #5 +vqshrnt.s16 q0, q2, #5 +vqshrnt.u16 q0, q2, #5 +vqshrnt.s32 q0, q2, #5 +vqshrnt.u32 q0, q2, #5 +vqshrunb.s16 q0, q2, #5 +vqshrunb.s32 q0, q2, #5 +vqshrunt.s16 q0, q2, #5 +vqshrunt.s32 q0, q2, #5 +vqsub.s8 q0, q2, q1 +vqsub.u8 q0, q2, q1 +vqsub.s16 q0, q2, q1 +vqsub.u16 q0, q2, q1 +vqsub.s32 q0, q2, q1 +vqsub.u32 q0, q2, q1 +vqsub.s8 q0, q2, r0 +vqsub.u8 q0, q2, r0 +vqsub.s16 q0, q2, r0 +vqsub.u16 q0, q2, r0 +vqsub.s32 q0, q2, r0 +vqsub.u32 q0, q2, r0 +vrev16.8 q0, q2 +vrev32.8 q0, q2 +vrev32.16 q0, q2 +vrev64.8 q0, q2 +vrev64.16 q0, q2 +vrev64.32 q0, q2 +vrhadd.s8 q0, q2, q1 +vrhadd.u8 q0, q2, q1 +vrhadd.s16 q0, q2, q1 +vrhadd.u16 q0, q2, q1 +vrhadd.s32 q0, q2, q1 +vrhadd.u32 q0, q2, q1 +vrmlaldavh.s32 r0, r1, q2, q1 +vrmlaldavh.u32 r0, r1, q2, q1 +vrmlaldavha.s32 r0, r1, q2, q1 +vrmlaldavha.u32 r0, r1, q2, q1 +vrmlaldavhx.s32 r0, r1, q2, q1 +vrmlaldavhax.s32 r0, r1, q2, q1 +vrmlsldavh.s32 r0, r1, q2, q1 +vrmlsldavha.s32 r0, r1, q2, q1 +vrmlsldavhx.s32 r0, r1, q2, q1 +vrmlsldavhax.s32 r0, r1, q2, q1 +vrshl.s8 q0, q2, q1 +vrshl.u8 q0, q2, q1 +vrshl.s16 q0, q2, q1 +vrshl.u16 q0, q2, q1 +vrshl.s32 q0, q2, q1 +vrshl.u32 q0, q2, q1 +vrshl.s8 q0, r0 +vrshl.u8 q0, r0 +vrshl.s16 q0, r0 +vrshl.u16 q0, r0 +vrshl.s32 q0, r0 +vrshl.u32 q0, r0 +vrshr.s8 q0, q2, #5 +vrshr.u8 q0, q2, #5 +vrshr.s16 q0, q2, #5 +vrshr.u16 q0, q2, #5 +vrshr.s32 q0, q2, #5 +vrshr.u32 q0, q2, #5 +vrshrnb.i16 q0, q2, #5 +vrshrnb.i32 q0, q2, #5 +vrshrnt.i16 q0, q2, #5 +vrshrnt.i32 q0, q2, #5 +vsbc.i32 q0, q2, q1 +vsbci.i32 q0, q2, q1 +vshl.i8 q0, q2, #1 +vshl.i16 q0, q2, #1 +vshl.i32 q0, q2, #1 +vshl.s8 q0, r0 +vshl.u8 q0, r0 +vshl.s16 q0, r0 +vshl.u16 q0, r0 +vshl.s32 q0, r0 +vshl.u32 q0, r0 +vshl.s8 q0, q2, q1 +vshl.u8 q0, q2, q1 +vshl.s16 q0, q2, q1 +vshl.u16 q0, q2, q1 +vshl.s32 q0, q2, q1 +vshl.u32 q0, q2, q1 +vshlc q0, r0, #5 +vshllt.s8 q0, q2, #5 +vshllt.u8 q0, q2, #5 +vshllt.s16 q0, q2, #5 +vshllt.u16 q0, q2, #5 +vshllb.s8 q0, q2, #5 +vshllb.u8 q0, q2, #5 +vshllb.s16 q0, q2, #5 +vshllb.u16 q0, q2, #5 +vshllt.s8 q0, q2, #8 +vshllt.u8 q0, q2, #8 +vshllt.s16 q0, q2, #16 +vshllt.u16 q0, q2, #16 +vshllb.s8 q0, q2, #8 +vshllb.u8 q0, q2, #8 +vshllb.s16 q0, q2, #16 +vshllb.u16 q0, q2, #16 +vshr.s8 q0, q2, #5 +vshr.u8 q0, q2, #5 +vshr.s16 q0, q2, #5 +vshr.u16 q0, q2, #5 +vshr.s32 q0, q2, #5 +vshr.u32 q0, q2, #5 +vshrnb.i16 q0, q2, #5 +vshrnb.i32 q0, q2, #5 +vshrnt.i16 q0, q2, #5 +vshrnt.i32 q0, q2, #5 +vsli.8 q0, q2, #5 +vsli.16 q0, q2, #5 +vsli.32 q0, q2, #5 +vsri.8 q0, q2, #5 +vsri.16 q0, q2, #5 +vsri.32 q0, q2, #5 +vsub.i8 q0, q2, q1 +vsub.i16 q0, q2, q1 +vsub.i32 q0, q2, q1 +vsub.i8 q0, q2, r0 +vsub.i16 q0, q2, r0 +vsub.i32 q0, q2, r0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 2.00 vabav.s8 r0, q2, q1 +# CHECK-NEXT: 1 3 2.00 vabav.u8 r0, q2, q1 +# CHECK-NEXT: 1 3 2.00 vabav.s16 r0, q2, q1 +# CHECK-NEXT: 1 3 2.00 vabav.u16 r0, q2, q1 +# CHECK-NEXT: 1 3 2.00 vabav.s32 r0, q2, q1 +# CHECK-NEXT: 1 3 2.00 vabav.u32 r0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vabd.s8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vabd.u8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vabd.s16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vabd.u16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vabd.s32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vabd.u32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vabs.s8 q0, q2 +# CHECK-NEXT: 1 1 2.00 vabs.s16 q0, q2 +# CHECK-NEXT: 1 1 2.00 vabs.s32 q0, q2 +# CHECK-NEXT: 1 2 2.00 U vadc.i32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 U vadci.i32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vadd.i8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vadd.i16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vadd.i32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vadd.i8 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vadd.i16 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vadd.i32 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vaddlv.s32 r0, r1, q1 +# CHECK-NEXT: 1 2 2.00 vaddlv.u32 r0, r1, q1 +# CHECK-NEXT: 1 2 2.00 vaddlva.s32 r0, r1, q1 +# CHECK-NEXT: 1 2 2.00 vaddlva.u32 r0, r1, q1 +# CHECK-NEXT: 1 2 2.00 vaddv.s8 r0, q1 +# CHECK-NEXT: 1 2 2.00 vaddv.u8 r0, q1 +# CHECK-NEXT: 1 2 2.00 vaddv.s16 r0, q1 +# CHECK-NEXT: 1 2 2.00 vaddv.u16 r0, q1 +# CHECK-NEXT: 1 2 2.00 vaddv.s32 r0, q1 +# CHECK-NEXT: 1 2 2.00 vaddv.u32 r0, q1 +# CHECK-NEXT: 1 2 2.00 vaddva.s8 r0, q1 +# CHECK-NEXT: 1 2 2.00 vaddva.u8 r0, q1 +# CHECK-NEXT: 1 2 2.00 vaddva.s16 r0, q1 +# CHECK-NEXT: 1 2 2.00 vaddva.u16 r0, q1 +# CHECK-NEXT: 1 2 2.00 vaddva.s32 r0, q1 +# CHECK-NEXT: 1 2 2.00 vaddva.u32 r0, q1 +# CHECK-NEXT: 1 1 2.00 vand q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vbic.i16 q0, #0xa +# CHECK-NEXT: 1 1 2.00 vbic.i32 q0, #0xa +# CHECK-NEXT: 1 1 2.00 vbic q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vbrsr.8 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vbrsr.16 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vbrsr.32 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vcadd.i8 q0, q2, q1, #90 +# CHECK-NEXT: 1 1 2.00 vcadd.i16 q0, q2, q1, #90 +# CHECK-NEXT: 1 1 2.00 vcadd.i32 q0, q2, q1, #90 +# CHECK-NEXT: 1 1 2.00 vcls.s8 q0, q2 +# CHECK-NEXT: 1 1 2.00 vcls.s16 q0, q2 +# CHECK-NEXT: 1 1 2.00 vcls.s32 q0, q2 +# CHECK-NEXT: 1 1 2.00 vclz.i8 q0, q2 +# CHECK-NEXT: 1 1 2.00 vclz.i16 q0, q2 +# CHECK-NEXT: 1 1 2.00 vclz.i32 q0, q2 +# CHECK-NEXT: 1 1 2.00 vdwdup.u8 q0, r0, r1, #4 +# CHECK-NEXT: 1 1 2.00 vdwdup.u16 q0, r0, r1, #4 +# CHECK-NEXT: 1 1 2.00 vdwdup.u32 q0, r0, r1, #4 +# CHECK-NEXT: 1 1 2.00 vddup.u8 q0, r0, #4 +# CHECK-NEXT: 1 1 2.00 vddup.u16 q0, r0, #4 +# CHECK-NEXT: 1 1 2.00 vddup.u32 q0, r0, #4 +# CHECK-NEXT: 1 1 2.00 vdup.8 q0, r0 +# CHECK-NEXT: 1 1 2.00 vdup.16 q0, r0 +# CHECK-NEXT: 1 1 2.00 vdup.32 q0, r0 +# CHECK-NEXT: 1 1 2.00 veor q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vhadd.s8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vhadd.u8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vhadd.s16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vhadd.u16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vhadd.s32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vhadd.u32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vhadd.s8 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vhadd.u8 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vhadd.s16 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vhadd.u16 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vhadd.s32 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vhadd.u32 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vhcadd.s8 q0, q2, q1, #90 +# CHECK-NEXT: 1 1 2.00 vhcadd.s16 q0, q2, q1, #90 +# CHECK-NEXT: 1 1 2.00 vhcadd.s32 q0, q2, q1, #90 +# CHECK-NEXT: 1 1 2.00 vhsub.s8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vhsub.u8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vhsub.s16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vhsub.u16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vhsub.s32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vhsub.u32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vhsub.s8 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vhsub.u8 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vhsub.s16 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vhsub.u16 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vhsub.s32 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vhsub.u32 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 viwdup.u8 q0, r0, r1, #4 +# CHECK-NEXT: 1 1 2.00 viwdup.u16 q0, r0, r1, #4 +# CHECK-NEXT: 1 1 2.00 viwdup.u32 q0, r0, r1, #4 +# CHECK-NEXT: 1 1 2.00 vidup.u8 q0, r0, #4 +# CHECK-NEXT: 1 1 2.00 vidup.u16 q0, r0, #4 +# CHECK-NEXT: 1 1 2.00 vidup.u32 q0, r0, #4 +# CHECK-NEXT: 1 1 2.00 vmax.s8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vmax.u8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vmax.s16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vmax.u16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vmax.s32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vmax.u32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vmaxa.s8 q0, q2 +# CHECK-NEXT: 1 1 2.00 vmaxa.s16 q0, q2 +# CHECK-NEXT: 1 1 2.00 vmaxa.s32 q0, q2 +# CHECK-NEXT: 1 2 2.00 vmaxv.s8 r0, q2 +# CHECK-NEXT: 1 2 2.00 vmaxv.u8 r0, q2 +# CHECK-NEXT: 1 3 2.00 vmaxv.s16 r0, q2 +# CHECK-NEXT: 1 3 2.00 vmaxv.u16 r0, q2 +# CHECK-NEXT: 1 4 2.00 vmaxv.s32 r0, q2 +# CHECK-NEXT: 1 4 2.00 vmaxv.u32 r0, q2 +# CHECK-NEXT: 1 2 2.00 vmaxav.s8 r0, q2 +# CHECK-NEXT: 1 3 2.00 vmaxav.s16 r0, q2 +# CHECK-NEXT: 1 4 2.00 vmaxav.s32 r0, q2 +# CHECK-NEXT: 1 1 2.00 vmin.s8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vmin.u8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vmin.s16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vmin.u16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vmin.s32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vmin.u32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vmina.s8 q0, q2 +# CHECK-NEXT: 1 1 2.00 vmina.s16 q0, q2 +# CHECK-NEXT: 1 1 2.00 vmina.s32 q0, q2 +# CHECK-NEXT: 1 2 2.00 vminv.s8 r0, q2 +# CHECK-NEXT: 1 2 2.00 vminv.u8 r0, q2 +# CHECK-NEXT: 1 3 2.00 vminv.s16 r0, q2 +# CHECK-NEXT: 1 3 2.00 vminv.u16 r0, q2 +# CHECK-NEXT: 1 4 2.00 vminv.s32 r0, q2 +# CHECK-NEXT: 1 4 2.00 vminv.u32 r0, q2 +# CHECK-NEXT: 1 2 2.00 vminav.s8 r0, q2 +# CHECK-NEXT: 1 3 2.00 vminav.s16 r0, q2 +# CHECK-NEXT: 1 4 2.00 vminav.s32 r0, q2 +# CHECK-NEXT: 1 2 2.00 vmla.i8 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vmla.i16 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vmla.i32 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vmlav.s8 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlav.u8 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlav.s16 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlav.u16 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlav.s32 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlav.u32 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlava.s8 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlava.u8 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlava.s16 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlava.u16 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlava.s32 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlava.u32 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmladavax.s8 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmladavax.s16 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmladavax.s32 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmladavx.s8 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmladavx.s16 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmladavx.s32 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlalv.s16 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlalv.u16 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlalv.s32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlalv.u32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlalva.s16 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlalva.u16 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlalva.s32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlalva.u32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlaldavax.s16 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlaldavax.s32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlaldavx.s16 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlaldavx.s32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlas.i8 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vmlas.i16 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vmlas.i32 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vmlsdav.s8 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsdav.s16 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsdav.s32 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsdava.s8 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsdava.s16 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsdava.s32 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsdavax.s8 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsdavax.s16 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsdavax.s32 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsdavx.s8 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsdavx.s16 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsdavx.s32 r0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsldav.s16 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsldav.s32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsldava.s16 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsldava.s32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsldavax.s16 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsldavax.s32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsldavx.s16 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmlsldavx.s32 r0, r1, q2, q1 +# CHECK-NEXT: 1 1 1.00 vmov.8 q0[1], r0 +# CHECK-NEXT: 1 1 1.00 vmov.16 q0[1], r0 +# CHECK-NEXT: 1 1 1.00 vmov.32 q0[1], r0 +# CHECK-NEXT: 1 2 2.00 vmov.i8 q0, #0x0 +# CHECK-NEXT: 1 2 2.00 vmov.i16 q0, #0x0 +# CHECK-NEXT: 1 2 2.00 vmov.i32 q0, #0x0 +# CHECK-NEXT: 1 2 2.00 vmov.i64 q0, #0x0 +# CHECK-NEXT: 1 2 2.00 vmov.f32 q0, #1.000000e+00 +# CHECK-NEXT: 1 1 2.00 vmov r1, r2, q0[2], q0[0] +# CHECK-NEXT: 1 1 1.00 vmov q0[2], q0[0], r1, r2 +# CHECK-NEXT: 1 1 2.00 vmov.32 r0, q0[1] +# CHECK-NEXT: 1 1 2.00 vmov.s16 r0, q0[1] +# CHECK-NEXT: 1 1 2.00 vmov.u16 r0, q0[1] +# CHECK-NEXT: 1 1 2.00 vmov.s8 r0, q0[1] +# CHECK-NEXT: 1 1 2.00 vmov.u8 r0, q0[1] +# CHECK-NEXT: 1 1 2.00 vmovlb.s8 q0, q1 +# CHECK-NEXT: 1 1 2.00 vmovlb.u8 q0, q1 +# CHECK-NEXT: 1 1 2.00 vmovlb.s16 q0, q1 +# CHECK-NEXT: 1 1 2.00 vmovlb.u16 q0, q1 +# CHECK-NEXT: 1 1 2.00 vmovlt.s8 q0, q1 +# CHECK-NEXT: 1 1 2.00 vmovlt.u8 q0, q1 +# CHECK-NEXT: 1 1 2.00 vmovlt.s16 q0, q1 +# CHECK-NEXT: 1 1 2.00 vmovlt.u16 q0, q1 +# CHECK-NEXT: 1 3 2.00 vmovnb.i16 q0, q1 +# CHECK-NEXT: 1 3 2.00 vmovnb.i32 q0, q1 +# CHECK-NEXT: 1 3 2.00 vmovnt.i16 q0, q1 +# CHECK-NEXT: 1 3 2.00 vmovnt.i32 q0, q1 +# CHECK-NEXT: 1 2 2.00 vmul.i8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmul.i16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmul.i32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmul.i8 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vmul.i16 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vmul.i32 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vmulh.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmulh.u8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmulh.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmulh.u16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmulh.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmulh.u32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrmulh.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrmulh.u8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrmulh.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrmulh.u16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrmulh.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrmulh.u32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmullb.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmullb.u8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmullb.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmullb.u16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmullb.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmullb.u32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmullt.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmullt.u8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmullt.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmullt.u16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmullt.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmullt.u32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmullb.p8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmullb.p16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmullt.p8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vmullt.p16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vmvn.i16 q0, #0xa +# CHECK-NEXT: 1 1 2.00 vmvn.i32 q0, #0xa +# CHECK-NEXT: 1 1 2.00 vmvn q0, q2 +# CHECK-NEXT: 1 1 2.00 vneg.s8 q0, q2 +# CHECK-NEXT: 1 1 2.00 vneg.s16 q0, q2 +# CHECK-NEXT: 1 1 2.00 vneg.s32 q0, q2 +# CHECK-NEXT: 1 1 2.00 vorn q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorr.i16 q0, #0xa +# CHECK-NEXT: 1 1 2.00 vorr.i32 q0, #0xa +# CHECK-NEXT: 1 1 2.00 vorr q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vpsel q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vqabs.s8 q0, q2 +# CHECK-NEXT: 1 1 2.00 vqabs.s16 q0, q2 +# CHECK-NEXT: 1 1 2.00 vqabs.s32 q0, q2 +# CHECK-NEXT: 1 1 2.00 vqadd.s8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vqadd.u8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vqadd.s16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vqadd.u16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vqadd.s32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vqadd.u32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vqadd.s8 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vqadd.u8 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vqadd.s16 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vqadd.u16 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vqadd.s32 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vqadd.u32 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqdmladh.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmladh.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmladh.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmladhx.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmladhx.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmladhx.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrdmladh.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrdmladh.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrdmladh.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrdmladhx.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrdmladhx.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrdmladhx.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmlah.s8 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqdmlah.s16 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqdmlah.s32 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqrdmlah.s8 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqrdmlah.s16 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqrdmlah.s32 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqdmlash.s8 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqdmlash.s16 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqdmlash.s32 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqrdmlash.s8 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqrdmlash.s16 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqrdmlash.s32 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqdmlsdh.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmlsdh.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmlsdh.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmlsdhx.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmlsdhx.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmlsdhx.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrdmlsdh.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrdmlsdh.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrdmlsdh.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrdmlsdhx.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrdmlsdhx.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrdmlsdhx.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmulh.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmulh.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmulh.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrdmulh.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrdmulh.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrdmulh.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmulh.s8 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqdmulh.s16 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqdmulh.s32 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqrdmulh.s8 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqrdmulh.s16 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqrdmulh.s32 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqdmullt.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmullt.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqdmullb.s16 q0, q2, r0 +# CHECK-NEXT: 1 2 2.00 vqdmullb.s32 q0, q2, r0 +# CHECK-NEXT: 1 3 2.00 vqmovnt.s16 q0, q2 +# CHECK-NEXT: 1 3 2.00 vqmovnt.u16 q0, q2 +# CHECK-NEXT: 1 3 2.00 vqmovnt.s32 q0, q2 +# CHECK-NEXT: 1 3 2.00 vqmovnt.u32 q0, q2 +# CHECK-NEXT: 1 3 2.00 vqmovnb.s16 q0, q2 +# CHECK-NEXT: 1 3 2.00 vqmovnb.u16 q0, q2 +# CHECK-NEXT: 1 3 2.00 vqmovnb.s32 q0, q2 +# CHECK-NEXT: 1 3 2.00 vqmovnb.u32 q0, q2 +# CHECK-NEXT: 1 3 2.00 vqmovunt.s16 q0, q2 +# CHECK-NEXT: 1 3 2.00 vqmovunt.s32 q0, q2 +# CHECK-NEXT: 1 3 2.00 vqmovunb.s16 q0, q2 +# CHECK-NEXT: 1 3 2.00 vqmovunb.s32 q0, q2 +# CHECK-NEXT: 1 1 2.00 vqneg.s8 q0, q2 +# CHECK-NEXT: 1 1 2.00 vqneg.s16 q0, q2 +# CHECK-NEXT: 1 1 2.00 vqneg.s32 q0, q2 +# CHECK-NEXT: 1 2 2.00 vqrshl.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrshl.u8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrshl.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrshl.u16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrshl.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrshl.u32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqrshl.s8 q0, r0 +# CHECK-NEXT: 1 2 2.00 vqrshl.u8 q0, r0 +# CHECK-NEXT: 1 2 2.00 vqrshl.s16 q0, r0 +# CHECK-NEXT: 1 2 2.00 vqrshl.u16 q0, r0 +# CHECK-NEXT: 1 2 2.00 vqrshl.s32 q0, r0 +# CHECK-NEXT: 1 2 2.00 vqrshl.u32 q0, r0 +# CHECK-NEXT: 1 3 2.00 vqrshrnb.s16 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqrshrnb.u16 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqrshrnb.s32 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqrshrnb.u32 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqrshrnt.s16 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqrshrnt.u16 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqrshrnt.s32 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqrshrnt.u32 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqrshrunb.s16 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqrshrunb.s32 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqrshrunt.s16 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqrshrunt.s32 q0, q2, #5 +# CHECK-NEXT: 1 2 2.00 vqshl.s8 q0, r0 +# CHECK-NEXT: 1 2 2.00 vqshl.u8 q0, r0 +# CHECK-NEXT: 1 2 2.00 vqshl.s16 q0, r0 +# CHECK-NEXT: 1 2 2.00 vqshl.u16 q0, r0 +# CHECK-NEXT: 1 2 2.00 vqshl.s32 q0, r0 +# CHECK-NEXT: 1 2 2.00 vqshl.u32 q0, r0 +# CHECK-NEXT: 1 2 2.00 vqshl.s8 q0, q2, #5 +# CHECK-NEXT: 1 2 2.00 vqshl.u8 q0, q2, #5 +# CHECK-NEXT: 1 2 2.00 vqshl.s16 q0, q2, #5 +# CHECK-NEXT: 1 2 2.00 vqshl.u16 q0, q2, #5 +# CHECK-NEXT: 1 2 2.00 vqshl.s32 q0, q2, #5 +# CHECK-NEXT: 1 2 2.00 vqshl.u32 q0, q2, #5 +# CHECK-NEXT: 1 2 2.00 vqshlu.s8 q0, q2, #5 +# CHECK-NEXT: 1 2 2.00 vqshlu.s16 q0, q2, #5 +# CHECK-NEXT: 1 2 2.00 vqshlu.s32 q0, q2, #5 +# CHECK-NEXT: 1 2 2.00 vqshl.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqshl.u8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqshl.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqshl.u16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqshl.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vqshl.u32 q0, q2, q1 +# CHECK-NEXT: 1 3 2.00 vqshrnb.s16 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqshrnb.u16 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqshrnb.s32 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqshrnb.u32 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqshrnt.s16 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqshrnt.u16 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqshrnt.s32 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqshrnt.u32 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqshrunb.s16 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqshrunb.s32 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqshrunt.s16 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vqshrunt.s32 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vqsub.s8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vqsub.u8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vqsub.s16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vqsub.u16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vqsub.s32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vqsub.u32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vqsub.s8 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vqsub.u8 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vqsub.s16 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vqsub.u16 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vqsub.s32 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vqsub.u32 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vrev16.8 q0, q2 +# CHECK-NEXT: 1 1 2.00 vrev32.8 q0, q2 +# CHECK-NEXT: 1 1 2.00 vrev32.16 q0, q2 +# CHECK-NEXT: 1 1 2.00 vrev64.8 q0, q2 +# CHECK-NEXT: 1 1 2.00 vrev64.16 q0, q2 +# CHECK-NEXT: 1 1 2.00 vrev64.32 q0, q2 +# CHECK-NEXT: 1 1 2.00 vrhadd.s8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vrhadd.u8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vrhadd.s16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vrhadd.u16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vrhadd.s32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vrhadd.u32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrmlalvh.s32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrmlalvh.u32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrmlalvha.s32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrmlalvha.u32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrmlaldavhx.s32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrmlaldavhax.s32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrmlsldavh.s32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrmlsldavha.s32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrmlsldavhx.s32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrmlsldavhax.s32 r0, r1, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrshl.s8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrshl.u8 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrshl.s16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrshl.u16 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrshl.s32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrshl.u32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 vrshl.s8 q0, r0 +# CHECK-NEXT: 1 2 2.00 vrshl.u8 q0, r0 +# CHECK-NEXT: 1 2 2.00 vrshl.s16 q0, r0 +# CHECK-NEXT: 1 2 2.00 vrshl.u16 q0, r0 +# CHECK-NEXT: 1 2 2.00 vrshl.s32 q0, r0 +# CHECK-NEXT: 1 2 2.00 vrshl.u32 q0, r0 +# CHECK-NEXT: 1 2 2.00 vrshr.s8 q0, q2, #5 +# CHECK-NEXT: 1 2 2.00 vrshr.u8 q0, q2, #5 +# CHECK-NEXT: 1 2 2.00 vrshr.s16 q0, q2, #5 +# CHECK-NEXT: 1 2 2.00 vrshr.u16 q0, q2, #5 +# CHECK-NEXT: 1 2 2.00 vrshr.s32 q0, q2, #5 +# CHECK-NEXT: 1 2 2.00 vrshr.u32 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vrshrnb.i16 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vrshrnb.i32 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vrshrnt.i16 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vrshrnt.i32 q0, q2, #5 +# CHECK-NEXT: 1 2 2.00 U vsbc.i32 q0, q2, q1 +# CHECK-NEXT: 1 2 2.00 U vsbci.i32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vshl.i8 q0, q2, #1 +# CHECK-NEXT: 1 1 2.00 vshl.i16 q0, q2, #1 +# CHECK-NEXT: 1 1 2.00 vshl.i32 q0, q2, #1 +# CHECK-NEXT: 1 1 2.00 vshl.s8 q0, r0 +# CHECK-NEXT: 1 1 2.00 vshl.u8 q0, r0 +# CHECK-NEXT: 1 1 2.00 vshl.s16 q0, r0 +# CHECK-NEXT: 1 1 2.00 vshl.u16 q0, r0 +# CHECK-NEXT: 1 1 2.00 vshl.s32 q0, r0 +# CHECK-NEXT: 1 1 2.00 vshl.u32 q0, r0 +# CHECK-NEXT: 1 1 2.00 vshl.s8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vshl.u8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vshl.s16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vshl.u16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vshl.s32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vshl.u32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 U vshlc q0, r0, #5 +# CHECK-NEXT: 1 1 2.00 vshllt.s8 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vshllt.u8 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vshllt.s16 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vshllt.u16 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vshllb.s8 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vshllb.u8 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vshllb.s16 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vshllb.u16 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vshllt.s8 q0, q2, #8 +# CHECK-NEXT: 1 1 2.00 vshllt.u8 q0, q2, #8 +# CHECK-NEXT: 1 1 2.00 vshllt.s16 q0, q2, #16 +# CHECK-NEXT: 1 1 2.00 vshllt.u16 q0, q2, #16 +# CHECK-NEXT: 1 1 2.00 vshllb.s8 q0, q2, #8 +# CHECK-NEXT: 1 1 2.00 vshllb.u8 q0, q2, #8 +# CHECK-NEXT: 1 1 2.00 vshllb.s16 q0, q2, #16 +# CHECK-NEXT: 1 1 2.00 vshllb.u16 q0, q2, #16 +# CHECK-NEXT: 1 1 2.00 vshr.s8 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vshr.u8 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vshr.s16 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vshr.u16 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vshr.s32 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vshr.u32 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vshrnb.i16 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vshrnb.i32 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vshrnt.i16 q0, q2, #5 +# CHECK-NEXT: 1 3 2.00 vshrnt.i32 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vsli.8 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vsli.16 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vsli.32 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vsri.8 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vsri.16 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vsri.32 q0, q2, #5 +# CHECK-NEXT: 1 1 2.00 vsub.i8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vsub.i16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vsub.i32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 vsub.i8 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vsub.i16 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 vsub.i32 q0, q2, r0 + +# CHECK: Resources: +# CHECK-NEXT: [0] - M55UnitALU +# CHECK-NEXT: [1] - M55UnitLoadStore +# CHECK-NEXT: [2] - M55UnitVecALU +# CHECK-NEXT: [3] - M55UnitVecFPALU +# CHECK-NEXT: [4] - M55UnitVecSys + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] +# CHECK-NEXT: - - 672.00 354.00 - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] Instructions: +# CHECK-NEXT: - - 2.00 - - vabav.s8 r0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vabav.u8 r0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vabav.s16 r0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vabav.u16 r0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vabav.s32 r0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vabav.u32 r0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vabd.s8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vabd.u8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vabd.s16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vabd.u16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vabd.s32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vabd.u32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vabs.s8 q0, q2 +# CHECK-NEXT: - - 2.00 - - vabs.s16 q0, q2 +# CHECK-NEXT: - - 2.00 - - vabs.s32 q0, q2 +# CHECK-NEXT: - - 2.00 - - vadc.i32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vadci.i32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vadd.i8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vadd.i16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vadd.i32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vadd.i8 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vadd.i16 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vadd.i32 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vaddlv.s32 r0, r1, q1 +# CHECK-NEXT: - - - 2.00 - vaddlv.u32 r0, r1, q1 +# CHECK-NEXT: - - - 2.00 - vaddlva.s32 r0, r1, q1 +# CHECK-NEXT: - - - 2.00 - vaddlva.u32 r0, r1, q1 +# CHECK-NEXT: - - - 2.00 - vaddv.s8 r0, q1 +# CHECK-NEXT: - - - 2.00 - vaddv.u8 r0, q1 +# CHECK-NEXT: - - - 2.00 - vaddv.s16 r0, q1 +# CHECK-NEXT: - - - 2.00 - vaddv.u16 r0, q1 +# CHECK-NEXT: - - - 2.00 - vaddv.s32 r0, q1 +# CHECK-NEXT: - - - 2.00 - vaddv.u32 r0, q1 +# CHECK-NEXT: - - - 2.00 - vaddva.s8 r0, q1 +# CHECK-NEXT: - - - 2.00 - vaddva.u8 r0, q1 +# CHECK-NEXT: - - - 2.00 - vaddva.s16 r0, q1 +# CHECK-NEXT: - - - 2.00 - vaddva.u16 r0, q1 +# CHECK-NEXT: - - - 2.00 - vaddva.s32 r0, q1 +# CHECK-NEXT: - - - 2.00 - vaddva.u32 r0, q1 +# CHECK-NEXT: - - 2.00 - - vand q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vbic.i16 q0, #0xa +# CHECK-NEXT: - - 2.00 - - vbic.i32 q0, #0xa +# CHECK-NEXT: - - 2.00 - - vbic q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vbrsr.8 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vbrsr.16 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vbrsr.32 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vcadd.i8 q0, q2, q1, #90 +# CHECK-NEXT: - - 2.00 - - vcadd.i16 q0, q2, q1, #90 +# CHECK-NEXT: - - 2.00 - - vcadd.i32 q0, q2, q1, #90 +# CHECK-NEXT: - - 2.00 - - vcls.s8 q0, q2 +# CHECK-NEXT: - - 2.00 - - vcls.s16 q0, q2 +# CHECK-NEXT: - - 2.00 - - vcls.s32 q0, q2 +# CHECK-NEXT: - - 2.00 - - vclz.i8 q0, q2 +# CHECK-NEXT: - - 2.00 - - vclz.i16 q0, q2 +# CHECK-NEXT: - - 2.00 - - vclz.i32 q0, q2 +# CHECK-NEXT: - - 2.00 - - vdwdup.u8 q0, r0, r1, #4 +# CHECK-NEXT: - - 2.00 - - vdwdup.u16 q0, r0, r1, #4 +# CHECK-NEXT: - - 2.00 - - vdwdup.u32 q0, r0, r1, #4 +# CHECK-NEXT: - - 2.00 - - vddup.u8 q0, r0, #4 +# CHECK-NEXT: - - 2.00 - - vddup.u16 q0, r0, #4 +# CHECK-NEXT: - - 2.00 - - vddup.u32 q0, r0, #4 +# CHECK-NEXT: - - 2.00 - - vdup.8 q0, r0 +# CHECK-NEXT: - - 2.00 - - vdup.16 q0, r0 +# CHECK-NEXT: - - 2.00 - - vdup.32 q0, r0 +# CHECK-NEXT: - - 2.00 - - veor q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vhadd.s8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vhadd.u8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vhadd.s16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vhadd.u16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vhadd.s32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vhadd.u32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vhadd.s8 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vhadd.u8 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vhadd.s16 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vhadd.u16 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vhadd.s32 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vhadd.u32 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vhcadd.s8 q0, q2, q1, #90 +# CHECK-NEXT: - - 2.00 - - vhcadd.s16 q0, q2, q1, #90 +# CHECK-NEXT: - - 2.00 - - vhcadd.s32 q0, q2, q1, #90 +# CHECK-NEXT: - - 2.00 - - vhsub.s8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vhsub.u8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vhsub.s16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vhsub.u16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vhsub.s32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vhsub.u32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vhsub.s8 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vhsub.u8 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vhsub.s16 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vhsub.u16 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vhsub.s32 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vhsub.u32 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - viwdup.u8 q0, r0, r1, #4 +# CHECK-NEXT: - - 2.00 - - viwdup.u16 q0, r0, r1, #4 +# CHECK-NEXT: - - 2.00 - - viwdup.u32 q0, r0, r1, #4 +# CHECK-NEXT: - - 2.00 - - vidup.u8 q0, r0, #4 +# CHECK-NEXT: - - 2.00 - - vidup.u16 q0, r0, #4 +# CHECK-NEXT: - - 2.00 - - vidup.u32 q0, r0, #4 +# CHECK-NEXT: - - 2.00 - - vmax.s8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmax.u8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmax.s16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmax.u16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmax.s32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmax.u32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmaxa.s8 q0, q2 +# CHECK-NEXT: - - 2.00 - - vmaxa.s16 q0, q2 +# CHECK-NEXT: - - 2.00 - - vmaxa.s32 q0, q2 +# CHECK-NEXT: - - 2.00 - - vmaxv.s8 r0, q2 +# CHECK-NEXT: - - 2.00 - - vmaxv.u8 r0, q2 +# CHECK-NEXT: - - 2.00 - - vmaxv.s16 r0, q2 +# CHECK-NEXT: - - 2.00 - - vmaxv.u16 r0, q2 +# CHECK-NEXT: - - 2.00 - - vmaxv.s32 r0, q2 +# CHECK-NEXT: - - 2.00 - - vmaxv.u32 r0, q2 +# CHECK-NEXT: - - 2.00 - - vmaxav.s8 r0, q2 +# CHECK-NEXT: - - 2.00 - - vmaxav.s16 r0, q2 +# CHECK-NEXT: - - 2.00 - - vmaxav.s32 r0, q2 +# CHECK-NEXT: - - 2.00 - - vmin.s8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmin.u8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmin.s16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmin.u16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmin.s32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmin.u32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmina.s8 q0, q2 +# CHECK-NEXT: - - 2.00 - - vmina.s16 q0, q2 +# CHECK-NEXT: - - 2.00 - - vmina.s32 q0, q2 +# CHECK-NEXT: - - 2.00 - - vminv.s8 r0, q2 +# CHECK-NEXT: - - 2.00 - - vminv.u8 r0, q2 +# CHECK-NEXT: - - 2.00 - - vminv.s16 r0, q2 +# CHECK-NEXT: - - 2.00 - - vminv.u16 r0, q2 +# CHECK-NEXT: - - 2.00 - - vminv.s32 r0, q2 +# CHECK-NEXT: - - 2.00 - - vminv.u32 r0, q2 +# CHECK-NEXT: - - 2.00 - - vminav.s8 r0, q2 +# CHECK-NEXT: - - 2.00 - - vminav.s16 r0, q2 +# CHECK-NEXT: - - 2.00 - - vminav.s32 r0, q2 +# CHECK-NEXT: - - - 2.00 - vmla.i8 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vmla.i16 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vmla.i32 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vmlav.s8 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlav.u8 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlav.s16 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlav.u16 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlav.s32 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlav.u32 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlava.s8 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlava.u8 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlava.s16 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlava.u16 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlava.s32 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlava.u32 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmladavax.s8 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmladavax.s16 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmladavax.s32 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmladavx.s8 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmladavx.s16 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmladavx.s32 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlalv.s16 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlalv.u16 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlalv.s32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlalv.u32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlalva.s16 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlalva.u16 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlalva.s32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlalva.u32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlaldavax.s16 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlaldavax.s32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlaldavx.s16 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlaldavx.s32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlas.i8 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vmlas.i16 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vmlas.i32 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vmlsdav.s8 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsdav.s16 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsdav.s32 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsdava.s8 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsdava.s16 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsdava.s32 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsdavax.s8 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsdavax.s16 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsdavax.s32 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsdavx.s8 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsdavx.s16 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsdavx.s32 r0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsldav.s16 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsldav.s32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsldava.s16 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsldava.s32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsldavax.s16 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsldavax.s32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsldavx.s16 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmlsldavx.s32 r0, r1, q2, q1 +# CHECK-NEXT: - - 1.00 1.00 - vmov.8 q0[1], r0 +# CHECK-NEXT: - - 1.00 1.00 - vmov.16 q0[1], r0 +# CHECK-NEXT: - - 1.00 1.00 - vmov.32 q0[1], r0 +# CHECK-NEXT: - - - 2.00 - vmov.i8 q0, #0x0 +# CHECK-NEXT: - - - 2.00 - vmov.i16 q0, #0x0 +# CHECK-NEXT: - - - 2.00 - vmov.i32 q0, #0x0 +# CHECK-NEXT: - - - 2.00 - vmov.i64 q0, #0x0 +# CHECK-NEXT: - - - 2.00 - vmov.f32 q0, #1.000000e+00 +# CHECK-NEXT: - - - 2.00 - vmov r1, r2, q0[2], q0[0] +# CHECK-NEXT: - - 1.00 1.00 - vmov q0[2], q0[0], r1, r2 +# CHECK-NEXT: - - - 2.00 - vmov.32 r0, q0[1] +# CHECK-NEXT: - - - 2.00 - vmov.s16 r0, q0[1] +# CHECK-NEXT: - - - 2.00 - vmov.u16 r0, q0[1] +# CHECK-NEXT: - - - 2.00 - vmov.s8 r0, q0[1] +# CHECK-NEXT: - - - 2.00 - vmov.u8 r0, q0[1] +# CHECK-NEXT: - - 2.00 - - vmovlb.s8 q0, q1 +# CHECK-NEXT: - - 2.00 - - vmovlb.u8 q0, q1 +# CHECK-NEXT: - - 2.00 - - vmovlb.s16 q0, q1 +# CHECK-NEXT: - - 2.00 - - vmovlb.u16 q0, q1 +# CHECK-NEXT: - - 2.00 - - vmovlt.s8 q0, q1 +# CHECK-NEXT: - - 2.00 - - vmovlt.u8 q0, q1 +# CHECK-NEXT: - - 2.00 - - vmovlt.s16 q0, q1 +# CHECK-NEXT: - - 2.00 - - vmovlt.u16 q0, q1 +# CHECK-NEXT: - - 2.00 - - vmovnb.i16 q0, q1 +# CHECK-NEXT: - - 2.00 - - vmovnb.i32 q0, q1 +# CHECK-NEXT: - - 2.00 - - vmovnt.i16 q0, q1 +# CHECK-NEXT: - - 2.00 - - vmovnt.i32 q0, q1 +# CHECK-NEXT: - - - 2.00 - vmul.i8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmul.i16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmul.i32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmul.i8 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vmul.i16 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vmul.i32 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vmulh.s8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmulh.u8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmulh.s16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmulh.u16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmulh.s32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmulh.u32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vrmulh.s8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vrmulh.u8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vrmulh.s16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vrmulh.u16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vrmulh.s32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vrmulh.u32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmullb.s8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmullb.u8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmullb.s16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmullb.u16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmullb.s32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmullb.u32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmullt.s8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmullt.u8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmullt.s16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmullt.u16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmullt.s32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vmullt.u32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmullb.p8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmullb.p16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmullt.p8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmullt.p16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vmvn.i16 q0, #0xa +# CHECK-NEXT: - - 2.00 - - vmvn.i32 q0, #0xa +# CHECK-NEXT: - - 2.00 - - vmvn q0, q2 +# CHECK-NEXT: - - 2.00 - - vneg.s8 q0, q2 +# CHECK-NEXT: - - 2.00 - - vneg.s16 q0, q2 +# CHECK-NEXT: - - 2.00 - - vneg.s32 q0, q2 +# CHECK-NEXT: - - 2.00 - - vorn q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorr.i16 q0, #0xa +# CHECK-NEXT: - - 2.00 - - vorr.i32 q0, #0xa +# CHECK-NEXT: - - 2.00 - - vorr q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vpsel q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqabs.s8 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqabs.s16 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqabs.s32 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqadd.s8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqadd.u8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqadd.s16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqadd.u16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqadd.s32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqadd.u32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqadd.s8 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vqadd.u8 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vqadd.s16 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vqadd.u16 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vqadd.s32 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vqadd.u32 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqdmladh.s8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmladh.s16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmladh.s32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmladhx.s8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmladhx.s16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmladhx.s32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqrdmladh.s8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqrdmladh.s16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqrdmladh.s32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqrdmladhx.s8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqrdmladhx.s16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqrdmladhx.s32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmlah.s8 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqdmlah.s16 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqdmlah.s32 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqrdmlah.s8 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqrdmlah.s16 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqrdmlah.s32 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqdmlash.s8 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqdmlash.s16 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqdmlash.s32 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqrdmlash.s8 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqrdmlash.s16 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqrdmlash.s32 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqdmlsdh.s8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmlsdh.s16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmlsdh.s32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmlsdhx.s8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmlsdhx.s16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmlsdhx.s32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqrdmlsdh.s8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqrdmlsdh.s16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqrdmlsdh.s32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqrdmlsdhx.s8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqrdmlsdhx.s16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqrdmlsdhx.s32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmulh.s8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmulh.s16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmulh.s32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqrdmulh.s8 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqrdmulh.s16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqrdmulh.s32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmulh.s8 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqdmulh.s16 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqdmulh.s32 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqrdmulh.s8 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqrdmulh.s16 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqrdmulh.s32 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqdmullt.s16 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmullt.s32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vqdmullb.s16 q0, q2, r0 +# CHECK-NEXT: - - - 2.00 - vqdmullb.s32 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vqmovnt.s16 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqmovnt.u16 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqmovnt.s32 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqmovnt.u32 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqmovnb.s16 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqmovnb.u16 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqmovnb.s32 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqmovnb.u32 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqmovunt.s16 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqmovunt.s32 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqmovunb.s16 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqmovunb.s32 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqneg.s8 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqneg.s16 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqneg.s32 q0, q2 +# CHECK-NEXT: - - 2.00 - - vqrshl.s8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqrshl.u8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqrshl.s16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqrshl.u16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqrshl.s32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqrshl.u32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqrshl.s8 q0, r0 +# CHECK-NEXT: - - 2.00 - - vqrshl.u8 q0, r0 +# CHECK-NEXT: - - 2.00 - - vqrshl.s16 q0, r0 +# CHECK-NEXT: - - 2.00 - - vqrshl.u16 q0, r0 +# CHECK-NEXT: - - 2.00 - - vqrshl.s32 q0, r0 +# CHECK-NEXT: - - 2.00 - - vqrshl.u32 q0, r0 +# CHECK-NEXT: - - 2.00 - - vqrshrnb.s16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqrshrnb.u16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqrshrnb.s32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqrshrnb.u32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqrshrnt.s16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqrshrnt.u16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqrshrnt.s32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqrshrnt.u32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqrshrunb.s16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqrshrunb.s32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqrshrunt.s16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqrshrunt.s32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshl.s8 q0, r0 +# CHECK-NEXT: - - 2.00 - - vqshl.u8 q0, r0 +# CHECK-NEXT: - - 2.00 - - vqshl.s16 q0, r0 +# CHECK-NEXT: - - 2.00 - - vqshl.u16 q0, r0 +# CHECK-NEXT: - - 2.00 - - vqshl.s32 q0, r0 +# CHECK-NEXT: - - 2.00 - - vqshl.u32 q0, r0 +# CHECK-NEXT: - - 2.00 - - vqshl.s8 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshl.u8 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshl.s16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshl.u16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshl.s32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshl.u32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshlu.s8 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshlu.s16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshlu.s32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshl.s8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqshl.u8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqshl.s16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqshl.u16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqshl.s32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqshl.u32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqshrnb.s16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshrnb.u16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshrnb.s32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshrnb.u32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshrnt.s16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshrnt.u16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshrnt.s32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshrnt.u32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshrunb.s16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshrunb.s32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshrunt.s16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqshrunt.s32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vqsub.s8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqsub.u8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqsub.s16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqsub.u16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqsub.s32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqsub.u32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vqsub.s8 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vqsub.u8 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vqsub.s16 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vqsub.u16 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vqsub.s32 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vqsub.u32 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vrev16.8 q0, q2 +# CHECK-NEXT: - - 2.00 - - vrev32.8 q0, q2 +# CHECK-NEXT: - - 2.00 - - vrev32.16 q0, q2 +# CHECK-NEXT: - - 2.00 - - vrev64.8 q0, q2 +# CHECK-NEXT: - - 2.00 - - vrev64.16 q0, q2 +# CHECK-NEXT: - - 2.00 - - vrev64.32 q0, q2 +# CHECK-NEXT: - - 2.00 - - vrhadd.s8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vrhadd.u8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vrhadd.s16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vrhadd.u16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vrhadd.s32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vrhadd.u32 q0, q2, q1 +# CHECK-NEXT: - - - 2.00 - vrmlalvh.s32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vrmlalvh.u32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vrmlalvha.s32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vrmlalvha.u32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vrmlaldavhx.s32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vrmlaldavhax.s32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vrmlsldavh.s32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vrmlsldavha.s32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vrmlsldavhx.s32 r0, r1, q2, q1 +# CHECK-NEXT: - - - 2.00 - vrmlsldavhax.s32 r0, r1, q2, q1 +# CHECK-NEXT: - - 2.00 - - vrshl.s8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vrshl.u8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vrshl.s16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vrshl.u16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vrshl.s32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vrshl.u32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vrshl.s8 q0, r0 +# CHECK-NEXT: - - 2.00 - - vrshl.u8 q0, r0 +# CHECK-NEXT: - - 2.00 - - vrshl.s16 q0, r0 +# CHECK-NEXT: - - 2.00 - - vrshl.u16 q0, r0 +# CHECK-NEXT: - - 2.00 - - vrshl.s32 q0, r0 +# CHECK-NEXT: - - 2.00 - - vrshl.u32 q0, r0 +# CHECK-NEXT: - - 2.00 - - vrshr.s8 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vrshr.u8 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vrshr.s16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vrshr.u16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vrshr.s32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vrshr.u32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vrshrnb.i16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vrshrnb.i32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vrshrnt.i16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vrshrnt.i32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vsbc.i32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vsbci.i32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vshl.i8 q0, q2, #1 +# CHECK-NEXT: - - 2.00 - - vshl.i16 q0, q2, #1 +# CHECK-NEXT: - - 2.00 - - vshl.i32 q0, q2, #1 +# CHECK-NEXT: - - 2.00 - - vshl.s8 q0, r0 +# CHECK-NEXT: - - 2.00 - - vshl.u8 q0, r0 +# CHECK-NEXT: - - 2.00 - - vshl.s16 q0, r0 +# CHECK-NEXT: - - 2.00 - - vshl.u16 q0, r0 +# CHECK-NEXT: - - 2.00 - - vshl.s32 q0, r0 +# CHECK-NEXT: - - 2.00 - - vshl.u32 q0, r0 +# CHECK-NEXT: - - 2.00 - - vshl.s8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vshl.u8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vshl.s16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vshl.u16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vshl.s32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vshl.u32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vshlc q0, r0, #5 +# CHECK-NEXT: - - 2.00 - - vshllt.s8 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshllt.u8 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshllt.s16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshllt.u16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshllb.s8 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshllb.u8 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshllb.s16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshllb.u16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshllt.s8 q0, q2, #8 +# CHECK-NEXT: - - 2.00 - - vshllt.u8 q0, q2, #8 +# CHECK-NEXT: - - 2.00 - - vshllt.s16 q0, q2, #16 +# CHECK-NEXT: - - 2.00 - - vshllt.u16 q0, q2, #16 +# CHECK-NEXT: - - 2.00 - - vshllb.s8 q0, q2, #8 +# CHECK-NEXT: - - 2.00 - - vshllb.u8 q0, q2, #8 +# CHECK-NEXT: - - 2.00 - - vshllb.s16 q0, q2, #16 +# CHECK-NEXT: - - 2.00 - - vshllb.u16 q0, q2, #16 +# CHECK-NEXT: - - 2.00 - - vshr.s8 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshr.u8 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshr.s16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshr.u16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshr.s32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshr.u32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshrnb.i16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshrnb.i32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshrnt.i16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vshrnt.i32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vsli.8 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vsli.16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vsli.32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vsri.8 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vsri.16 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vsri.32 q0, q2, #5 +# CHECK-NEXT: - - 2.00 - - vsub.i8 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vsub.i16 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vsub.i32 q0, q2, q1 +# CHECK-NEXT: - - 2.00 - - vsub.i8 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vsub.i16 q0, q2, r0 +# CHECK-NEXT: - - 2.00 - - vsub.i32 q0, q2, r0 diff --git a/llvm/test/tools/llvm-mca/ARM/m55-mve-ldst.s b/llvm/test/tools/llvm-mca/ARM/m55-mve-ldst.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/ARM/m55-mve-ldst.s @@ -0,0 +1,323 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=thumbv8.1-m.main-none-none-eabi -mcpu=cortex-m55 -instruction-tables < %s | FileCheck %s + +vldrb.8 q1, [r0, 0] +vldrb.8 q1, [r0, 0]! +vldrb.8 q1, [r0], 0 +vldrh.16 q1, [r0, 0] +vldrh.16 q1, [r0, 0]! +vldrh.16 q1, [r0], 0 +vldrw.32 q1, [r0, 0] +vldrw.32 q1, [r0, 0]! +vldrw.32 q1, [r0], 0 + +vldrb.u16 q1, [r0, 0] +vldrb.u16 q1, [r0, 0]! +vldrb.u16 q1, [r0], 0 +vldrb.u32 q1, [r0, 0] +vldrb.u32 q1, [r0, 0]! +vldrb.u32 q1, [r0], 0 +vldrh.u32 q1, [r0, 0] +vldrh.u32 q1, [r0, 0]! +vldrh.u32 q1, [r0], 0 + +vldrb.s16 q1, [r0, 4] +vldrb.s16 q1, [r0, 4]! +vldrb.s16 q1, [r0], 4 +vldrb.s32 q1, [r0, 4] +vldrb.s32 q1, [r0, 4]! +vldrb.s32 q1, [r0], 4 +vldrh.s32 q1, [r0, 4] +vldrh.s32 q1, [r0, 4]! +vldrh.s32 q1, [r0], 4 + +vldrw.32 q1, [r0, q0] +vldrh.16 q1, [r0, q0] +vldrb.8 q1, [r0, q0] +vldrb.u16 q1, [r0, q0] +vldrb.u32 q1, [r0, q0] +vldrh.u32 q1, [r0, q0] +vldrb.s16 q1, [r0, q0] +vldrb.s32 q1, [r0, q0] +vldrh.s32 q1, [r0, q0] +vldrw.32 q1, [r0, q0, uxtw #2] +vldrh.16 q1, [r0, q0, uxtw #1] +vldrh.u32 q1, [r0, q0, uxtw #1] +vldrh.s32 q1, [r0, q0, uxtw #1] + +vldrw.32 q1, [q0, 4] +vldrw.32 q1, [q0, 4]! + +vld20.8 {q0, q1}, [r0] +vld21.8 {q0, q1}, [r0]! +vld40.8 {q0, q1, q2, q3}, [r0] +vld43.8 {q0, q1, q2, q3}, [r0]! +vld20.16 {q0, q1}, [r0] +vld21.16 {q0, q1}, [r0]! +vld40.16 {q0, q1, q2, q3}, [r0] +vld43.16 {q0, q1, q2, q3}, [r0]! +vld20.32 {q0, q1}, [r0] +vld21.32 {q0, q1}, [r0]! +vld40.32 {q0, q1, q2, q3}, [r0] +vld43.32 {q0, q1, q2, q3}, [r0]! + +vstrb.8 q1, [r0, 0] +vstrb.8 q1, [r0, 0]! +vstrb.8 q1, [r0], 0 +vstrh.16 q1, [r0, 0] +vstrh.16 q1, [r0, 0]! +vstrh.16 q1, [r0], 0 +vstrw.32 q1, [r0, 0] +vstrw.32 q1, [r0, 0]! +vstrw.32 q1, [r0], 0 + +vstrb.16 q1, [r0, 0] +vstrb.16 q1, [r0, 0]! +vstrb.16 q1, [r0], 0 +vstrb.32 q1, [r0, 0] +vstrb.32 q1, [r0, 0]! +vstrb.32 q1, [r0], 0 +vstrh.32 q1, [r0, 0] +vstrh.32 q1, [r0, 0]! +vstrh.32 q1, [r0], 0 + +vstrw.32 q1, [r0, q0] +vstrh.16 q1, [r0, q0] +vstrb.8 q1, [r0, q0] +vstrb.16 q1, [r0, q0] +vstrb.32 q1, [r0, q0] +vstrh.32 q1, [r0, q0] + +vstrw.32 q1, [r0, q0, uxtw #2] +vstrh.16 q1, [r0, q0, uxtw #1] +vstrh.32 q1, [r0, q0, uxtw #1] + +vstrw.32 q1, [q0, 4] +vstrw.32 q1, [q0, 4]! + +vst20.8 {q0, q1}, [r0] +vst21.8 {q0, q1}, [r0]! +vst40.8 {q0, q1, q2, q3}, [r0] +vst43.8 {q0, q1, q2, q3}, [r0]! +vst20.16 {q0, q1}, [r0] +vst21.16 {q0, q1}, [r0]! +vst40.16 {q0, q1, q2, q3}, [r0] +vst43.16 {q0, q1, q2, q3}, [r0]! +vst20.32 {q0, q1}, [r0] +vst21.32 {q0, q1}, [r0]! +vst40.32 {q0, q1, q2, q3}, [r0] +vst43.32 {q0, q1, q2, q3}, [r0]! + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 2.00 * vldrb.u8 q1, [r0] +# CHECK-NEXT: 1 1 2.00 * vldrb.u8 q1, [r0, #0]! +# CHECK-NEXT: 1 1 2.00 * vldrb.u8 q1, [r0], #0 +# CHECK-NEXT: 1 1 2.00 * vldrh.u16 q1, [r0] +# CHECK-NEXT: 1 1 2.00 * vldrh.u16 q1, [r0, #0]! +# CHECK-NEXT: 1 1 2.00 * vldrh.u16 q1, [r0], #0 +# CHECK-NEXT: 1 1 2.00 * vldrw.u32 q1, [r0] +# CHECK-NEXT: 1 1 2.00 * vldrw.u32 q1, [r0, #0]! +# CHECK-NEXT: 1 1 2.00 * vldrw.u32 q1, [r0], #0 +# CHECK-NEXT: 1 1 2.00 * vldrb.u16 q1, [r0] +# CHECK-NEXT: 1 1 2.00 * vldrb.u16 q1, [r0]! +# CHECK-NEXT: 1 1 2.00 * vldrb.u16 q1, [r0], #0 +# CHECK-NEXT: 1 1 2.00 * vldrb.u32 q1, [r0] +# CHECK-NEXT: 1 1 2.00 * vldrb.u32 q1, [r0]! +# CHECK-NEXT: 1 1 2.00 * vldrb.u32 q1, [r0], #0 +# CHECK-NEXT: 1 1 2.00 * vldrh.u32 q1, [r0] +# CHECK-NEXT: 1 1 2.00 * vldrh.u32 q1, [r0]! +# CHECK-NEXT: 1 1 2.00 * vldrh.u32 q1, [r0], #0 +# CHECK-NEXT: 1 1 2.00 * vldrb.s16 q1, [r0, #4] +# CHECK-NEXT: 1 1 2.00 * vldrb.s16 q1, [r0, #4]! +# CHECK-NEXT: 1 1 2.00 * vldrb.s16 q1, [r0], #4 +# CHECK-NEXT: 1 1 2.00 * vldrb.s32 q1, [r0, #4] +# CHECK-NEXT: 1 1 2.00 * vldrb.s32 q1, [r0, #4]! +# CHECK-NEXT: 1 1 2.00 * vldrb.s32 q1, [r0], #4 +# CHECK-NEXT: 1 1 2.00 * vldrh.s32 q1, [r0, #4] +# CHECK-NEXT: 1 1 2.00 * vldrh.s32 q1, [r0, #4]! +# CHECK-NEXT: 1 1 2.00 * vldrh.s32 q1, [r0], #4 +# CHECK-NEXT: 1 6 2.00 * vldrw.u32 q1, [r0, q0] +# CHECK-NEXT: 1 6 2.00 * vldrh.u16 q1, [r0, q0] +# CHECK-NEXT: 1 6 2.00 * vldrb.u8 q1, [r0, q0] +# CHECK-NEXT: 1 6 2.00 * vldrb.u16 q1, [r0, q0] +# CHECK-NEXT: 1 6 2.00 * vldrb.u32 q1, [r0, q0] +# CHECK-NEXT: 1 6 2.00 * vldrh.u32 q1, [r0, q0] +# CHECK-NEXT: 1 6 2.00 * vldrb.s16 q1, [r0, q0] +# CHECK-NEXT: 1 6 2.00 * vldrb.s32 q1, [r0, q0] +# CHECK-NEXT: 1 6 2.00 * vldrh.s32 q1, [r0, q0] +# CHECK-NEXT: 1 6 2.00 * vldrw.u32 q1, [r0, q0, uxtw #2] +# CHECK-NEXT: 1 6 2.00 * vldrh.u16 q1, [r0, q0, uxtw #1] +# CHECK-NEXT: 1 6 2.00 * vldrh.u32 q1, [r0, q0, uxtw #1] +# CHECK-NEXT: 1 6 2.00 * vldrh.s32 q1, [r0, q0, uxtw #1] +# CHECK-NEXT: 1 6 2.00 * vldrw.u32 q1, [q0, #4] +# CHECK-NEXT: 1 6 2.00 * vldrw.u32 q1, [q0, #4]! +# CHECK-NEXT: 1 1 2.00 * vld20.8 {q0, q1}, [r0] +# CHECK-NEXT: 1 1 2.00 * vld21.8 {q0, q1}, [r0]! +# CHECK-NEXT: 1 1 2.00 * vld40.8 {q0, q1, q2, q3}, [r0] +# CHECK-NEXT: 1 1 2.00 * vld43.8 {q0, q1, q2, q3}, [r0]! +# CHECK-NEXT: 1 1 2.00 * vld20.16 {q0, q1}, [r0] +# CHECK-NEXT: 1 1 2.00 * vld21.16 {q0, q1}, [r0]! +# CHECK-NEXT: 1 1 2.00 * vld40.16 {q0, q1, q2, q3}, [r0] +# CHECK-NEXT: 1 1 2.00 * vld43.16 {q0, q1, q2, q3}, [r0]! +# CHECK-NEXT: 1 1 2.00 * vld20.32 {q0, q1}, [r0] +# CHECK-NEXT: 1 1 2.00 * vld21.32 {q0, q1}, [r0]! +# CHECK-NEXT: 1 1 2.00 * vld40.32 {q0, q1, q2, q3}, [r0] +# CHECK-NEXT: 1 1 2.00 * vld43.32 {q0, q1, q2, q3}, [r0]! +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q1, [r0] +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q1, [r0, #0]! +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q1, [r0], #0 +# CHECK-NEXT: 1 1 2.00 * vstrh.16 q1, [r0] +# CHECK-NEXT: 1 1 2.00 * vstrh.16 q1, [r0, #0]! +# CHECK-NEXT: 1 1 2.00 * vstrh.16 q1, [r0], #0 +# CHECK-NEXT: 1 1 2.00 * vstrw.32 q1, [r0] +# CHECK-NEXT: 1 1 2.00 * vstrw.32 q1, [r0, #0]! +# CHECK-NEXT: 1 1 2.00 * vstrw.32 q1, [r0], #0 +# CHECK-NEXT: 1 1 2.00 * vstrb.16 q1, [r0] +# CHECK-NEXT: 1 1 2.00 * vstrb.16 q1, [r0]! +# CHECK-NEXT: 1 1 2.00 * vstrb.16 q1, [r0], #0 +# CHECK-NEXT: 1 1 2.00 * vstrb.32 q1, [r0] +# CHECK-NEXT: 1 1 2.00 * vstrb.32 q1, [r0]! +# CHECK-NEXT: 1 1 2.00 * vstrb.32 q1, [r0], #0 +# CHECK-NEXT: 1 1 2.00 * vstrh.32 q1, [r0] +# CHECK-NEXT: 1 1 2.00 * vstrh.32 q1, [r0]! +# CHECK-NEXT: 1 1 2.00 * vstrh.32 q1, [r0], #0 +# CHECK-NEXT: 1 5 2.00 * vstrw.32 q1, [r0, q0] +# CHECK-NEXT: 1 5 2.00 * vstrh.16 q1, [r0, q0] +# CHECK-NEXT: 1 5 2.00 * vstrb.8 q1, [r0, q0] +# CHECK-NEXT: 1 5 2.00 * vstrb.16 q1, [r0, q0] +# CHECK-NEXT: 1 5 2.00 * vstrb.32 q1, [r0, q0] +# CHECK-NEXT: 1 5 2.00 * vstrh.32 q1, [r0, q0] +# CHECK-NEXT: 1 5 2.00 * vstrw.32 q1, [r0, q0, uxtw #2] +# CHECK-NEXT: 1 5 2.00 * vstrh.16 q1, [r0, q0, uxtw #1] +# CHECK-NEXT: 1 5 2.00 * vstrh.32 q1, [r0, q0, uxtw #1] +# CHECK-NEXT: 1 5 2.00 * vstrw.32 q1, [q0, #4] +# CHECK-NEXT: 1 5 2.00 * vstrw.32 q1, [q0, #4]! +# CHECK-NEXT: 1 1 2.00 * vst20.8 {q0, q1}, [r0] +# CHECK-NEXT: 1 1 2.00 * vst21.8 {q0, q1}, [r0]! +# CHECK-NEXT: 1 1 2.00 * vst40.8 {q0, q1, q2, q3}, [r0] +# CHECK-NEXT: 1 1 2.00 * vst43.8 {q0, q1, q2, q3}, [r0]! +# CHECK-NEXT: 1 1 2.00 * vst20.16 {q0, q1}, [r0] +# CHECK-NEXT: 1 1 2.00 * vst21.16 {q0, q1}, [r0]! +# CHECK-NEXT: 1 1 2.00 * vst40.16 {q0, q1, q2, q3}, [r0] +# CHECK-NEXT: 1 1 2.00 * vst43.16 {q0, q1, q2, q3}, [r0]! +# CHECK-NEXT: 1 1 2.00 * vst20.32 {q0, q1}, [r0] +# CHECK-NEXT: 1 1 2.00 * vst21.32 {q0, q1}, [r0]! +# CHECK-NEXT: 1 1 2.00 * vst40.32 {q0, q1, q2, q3}, [r0] +# CHECK-NEXT: 1 1 2.00 * vst43.32 {q0, q1, q2, q3}, [r0]! + +# CHECK: Resources: +# CHECK-NEXT: [0] - M55UnitALU +# CHECK-NEXT: [1] - M55UnitLoadStore +# CHECK-NEXT: [2] - M55UnitVecALU +# CHECK-NEXT: [3] - M55UnitVecFPALU +# CHECK-NEXT: [4] - M55UnitVecSys + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] +# CHECK-NEXT: - 190.00 - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] Instructions: +# CHECK-NEXT: - 2.00 - - - vldrb.u8 q1, [r0] +# CHECK-NEXT: - 2.00 - - - vldrb.u8 q1, [r0, #0]! +# CHECK-NEXT: - 2.00 - - - vldrb.u8 q1, [r0], #0 +# CHECK-NEXT: - 2.00 - - - vldrh.u16 q1, [r0] +# CHECK-NEXT: - 2.00 - - - vldrh.u16 q1, [r0, #0]! +# CHECK-NEXT: - 2.00 - - - vldrh.u16 q1, [r0], #0 +# CHECK-NEXT: - 2.00 - - - vldrw.u32 q1, [r0] +# CHECK-NEXT: - 2.00 - - - vldrw.u32 q1, [r0, #0]! +# CHECK-NEXT: - 2.00 - - - vldrw.u32 q1, [r0], #0 +# CHECK-NEXT: - 2.00 - - - vldrb.u16 q1, [r0] +# CHECK-NEXT: - 2.00 - - - vldrb.u16 q1, [r0]! +# CHECK-NEXT: - 2.00 - - - vldrb.u16 q1, [r0], #0 +# CHECK-NEXT: - 2.00 - - - vldrb.u32 q1, [r0] +# CHECK-NEXT: - 2.00 - - - vldrb.u32 q1, [r0]! +# CHECK-NEXT: - 2.00 - - - vldrb.u32 q1, [r0], #0 +# CHECK-NEXT: - 2.00 - - - vldrh.u32 q1, [r0] +# CHECK-NEXT: - 2.00 - - - vldrh.u32 q1, [r0]! +# CHECK-NEXT: - 2.00 - - - vldrh.u32 q1, [r0], #0 +# CHECK-NEXT: - 2.00 - - - vldrb.s16 q1, [r0, #4] +# CHECK-NEXT: - 2.00 - - - vldrb.s16 q1, [r0, #4]! +# CHECK-NEXT: - 2.00 - - - vldrb.s16 q1, [r0], #4 +# CHECK-NEXT: - 2.00 - - - vldrb.s32 q1, [r0, #4] +# CHECK-NEXT: - 2.00 - - - vldrb.s32 q1, [r0, #4]! +# CHECK-NEXT: - 2.00 - - - vldrb.s32 q1, [r0], #4 +# CHECK-NEXT: - 2.00 - - - vldrh.s32 q1, [r0, #4] +# CHECK-NEXT: - 2.00 - - - vldrh.s32 q1, [r0, #4]! +# CHECK-NEXT: - 2.00 - - - vldrh.s32 q1, [r0], #4 +# CHECK-NEXT: - 2.00 - - - vldrw.u32 q1, [r0, q0] +# CHECK-NEXT: - 2.00 - - - vldrh.u16 q1, [r0, q0] +# CHECK-NEXT: - 2.00 - - - vldrb.u8 q1, [r0, q0] +# CHECK-NEXT: - 2.00 - - - vldrb.u16 q1, [r0, q0] +# CHECK-NEXT: - 2.00 - - - vldrb.u32 q1, [r0, q0] +# CHECK-NEXT: - 2.00 - - - vldrh.u32 q1, [r0, q0] +# CHECK-NEXT: - 2.00 - - - vldrb.s16 q1, [r0, q0] +# CHECK-NEXT: - 2.00 - - - vldrb.s32 q1, [r0, q0] +# CHECK-NEXT: - 2.00 - - - vldrh.s32 q1, [r0, q0] +# CHECK-NEXT: - 2.00 - - - vldrw.u32 q1, [r0, q0, uxtw #2] +# CHECK-NEXT: - 2.00 - - - vldrh.u16 q1, [r0, q0, uxtw #1] +# CHECK-NEXT: - 2.00 - - - vldrh.u32 q1, [r0, q0, uxtw #1] +# CHECK-NEXT: - 2.00 - - - vldrh.s32 q1, [r0, q0, uxtw #1] +# CHECK-NEXT: - 2.00 - - - vldrw.u32 q1, [q0, #4] +# CHECK-NEXT: - 2.00 - - - vldrw.u32 q1, [q0, #4]! +# CHECK-NEXT: - 2.00 - - - vld20.8 {q0, q1}, [r0] +# CHECK-NEXT: - 2.00 - - - vld21.8 {q0, q1}, [r0]! +# CHECK-NEXT: - 2.00 - - - vld40.8 {q0, q1, q2, q3}, [r0] +# CHECK-NEXT: - 2.00 - - - vld43.8 {q0, q1, q2, q3}, [r0]! +# CHECK-NEXT: - 2.00 - - - vld20.16 {q0, q1}, [r0] +# CHECK-NEXT: - 2.00 - - - vld21.16 {q0, q1}, [r0]! +# CHECK-NEXT: - 2.00 - - - vld40.16 {q0, q1, q2, q3}, [r0] +# CHECK-NEXT: - 2.00 - - - vld43.16 {q0, q1, q2, q3}, [r0]! +# CHECK-NEXT: - 2.00 - - - vld20.32 {q0, q1}, [r0] +# CHECK-NEXT: - 2.00 - - - vld21.32 {q0, q1}, [r0]! +# CHECK-NEXT: - 2.00 - - - vld40.32 {q0, q1, q2, q3}, [r0] +# CHECK-NEXT: - 2.00 - - - vld43.32 {q0, q1, q2, q3}, [r0]! +# CHECK-NEXT: - 2.00 - - - vstrb.8 q1, [r0] +# CHECK-NEXT: - 2.00 - - - vstrb.8 q1, [r0, #0]! +# CHECK-NEXT: - 2.00 - - - vstrb.8 q1, [r0], #0 +# CHECK-NEXT: - 2.00 - - - vstrh.16 q1, [r0] +# CHECK-NEXT: - 2.00 - - - vstrh.16 q1, [r0, #0]! +# CHECK-NEXT: - 2.00 - - - vstrh.16 q1, [r0], #0 +# CHECK-NEXT: - 2.00 - - - vstrw.32 q1, [r0] +# CHECK-NEXT: - 2.00 - - - vstrw.32 q1, [r0, #0]! +# CHECK-NEXT: - 2.00 - - - vstrw.32 q1, [r0], #0 +# CHECK-NEXT: - 2.00 - - - vstrb.16 q1, [r0] +# CHECK-NEXT: - 2.00 - - - vstrb.16 q1, [r0]! +# CHECK-NEXT: - 2.00 - - - vstrb.16 q1, [r0], #0 +# CHECK-NEXT: - 2.00 - - - vstrb.32 q1, [r0] +# CHECK-NEXT: - 2.00 - - - vstrb.32 q1, [r0]! +# CHECK-NEXT: - 2.00 - - - vstrb.32 q1, [r0], #0 +# CHECK-NEXT: - 2.00 - - - vstrh.32 q1, [r0] +# CHECK-NEXT: - 2.00 - - - vstrh.32 q1, [r0]! +# CHECK-NEXT: - 2.00 - - - vstrh.32 q1, [r0], #0 +# CHECK-NEXT: - 2.00 - - - vstrw.32 q1, [r0, q0] +# CHECK-NEXT: - 2.00 - - - vstrh.16 q1, [r0, q0] +# CHECK-NEXT: - 2.00 - - - vstrb.8 q1, [r0, q0] +# CHECK-NEXT: - 2.00 - - - vstrb.16 q1, [r0, q0] +# CHECK-NEXT: - 2.00 - - - vstrb.32 q1, [r0, q0] +# CHECK-NEXT: - 2.00 - - - vstrh.32 q1, [r0, q0] +# CHECK-NEXT: - 2.00 - - - vstrw.32 q1, [r0, q0, uxtw #2] +# CHECK-NEXT: - 2.00 - - - vstrh.16 q1, [r0, q0, uxtw #1] +# CHECK-NEXT: - 2.00 - - - vstrh.32 q1, [r0, q0, uxtw #1] +# CHECK-NEXT: - 2.00 - - - vstrw.32 q1, [q0, #4] +# CHECK-NEXT: - 2.00 - - - vstrw.32 q1, [q0, #4]! +# CHECK-NEXT: - 2.00 - - - vst20.8 {q0, q1}, [r0] +# CHECK-NEXT: - 2.00 - - - vst21.8 {q0, q1}, [r0]! +# CHECK-NEXT: - 2.00 - - - vst40.8 {q0, q1, q2, q3}, [r0] +# CHECK-NEXT: - 2.00 - - - vst43.8 {q0, q1, q2, q3}, [r0]! +# CHECK-NEXT: - 2.00 - - - vst20.16 {q0, q1}, [r0] +# CHECK-NEXT: - 2.00 - - - vst21.16 {q0, q1}, [r0]! +# CHECK-NEXT: - 2.00 - - - vst40.16 {q0, q1, q2, q3}, [r0] +# CHECK-NEXT: - 2.00 - - - vst43.16 {q0, q1, q2, q3}, [r0]! +# CHECK-NEXT: - 2.00 - - - vst20.32 {q0, q1}, [r0] +# CHECK-NEXT: - 2.00 - - - vst21.32 {q0, q1}, [r0]! +# CHECK-NEXT: - 2.00 - - - vst40.32 {q0, q1, q2, q3}, [r0] +# CHECK-NEXT: - 2.00 - - - vst43.32 {q0, q1, q2, q3}, [r0]! diff --git a/llvm/test/tools/llvm-mca/ARM/m55-mve-pred.s b/llvm/test/tools/llvm-mca/ARM/m55-mve-pred.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/ARM/m55-mve-pred.s @@ -0,0 +1,694 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=thumbv8.1-m.main-none-none-eabi -mcpu=cortex-m55 -instruction-tables < %s | FileCheck %s + +vcmp.f16 eq, q2, q1 +vcmp.f32 eq, q2, q1 +vcmp.f16 ne, q2, q1 +vcmp.f32 ne, q2, q1 +vcmp.f16 ge, q2, q1 +vcmp.f32 ge, q2, q1 +vcmp.f16 lt, q2, q1 +vcmp.f32 lt, q2, q1 +vcmp.f16 gt, q2, q1 +vcmp.f32 gt, q2, q1 +vcmp.f16 le, q2, q1 +vcmp.f32 le, q2, q1 +vcmp.f16 eq, q2, r1 +vcmp.f32 eq, q2, r1 +vcmp.f16 ne, q2, r1 +vcmp.f32 ne, q2, r1 +vcmp.f16 ge, q2, r1 +vcmp.f32 ge, q2, r1 +vcmp.f16 lt, q2, r1 +vcmp.f32 lt, q2, r1 +vcmp.f16 gt, q2, r1 +vcmp.f32 gt, q2, r1 +vcmp.f16 le, q2, r1 +vcmp.f32 le, q2, r1 +vcmp.i8 eq, q2, q1 +vcmp.i16 eq, q2, q1 +vcmp.i32 eq, q2, q1 +vcmp.i8 ne, q2, q1 +vcmp.i16 ne, q2, q1 +vcmp.i32 ne, q2, q1 +vcmp.u8 cs, q2, q1 +vcmp.u16 cs, q2, q1 +vcmp.u32 cs, q2, q1 +vcmp.u8 hi, q2, q1 +vcmp.u16 hi, q2, q1 +vcmp.u32 hi, q2, q1 +vcmp.s8 ge, q2, q1 +vcmp.s16 ge, q2, q1 +vcmp.s32 ge, q2, q1 +vcmp.s8 lt, q2, q1 +vcmp.s16 lt, q2, q1 +vcmp.s32 lt, q2, q1 +vcmp.s8 gt, q2, q1 +vcmp.s16 gt, q2, q1 +vcmp.s32 gt, q2, q1 +vcmp.s8 le, q2, q1 +vcmp.s16 le, q2, q1 +vcmp.s32 le, q2, q1 +vcmp.i8 eq, q2, r1 +vcmp.i16 eq, q2, r1 +vcmp.i32 eq, q2, r1 +vcmp.i8 ne, q2, r1 +vcmp.i16 ne, q2, r1 +vcmp.i32 ne, q2, r1 +vcmp.u8 cs, q2, r1 +vcmp.u16 cs, q2, r1 +vcmp.u32 cs, q2, r1 +vcmp.u8 hi, q2, r1 +vcmp.u16 hi, q2, r1 +vcmp.u32 hi, q2, r1 +vcmp.s8 ge, q2, r1 +vcmp.s16 ge, q2, r1 +vcmp.s32 ge, q2, r1 +vcmp.s8 lt, q2, r1 +vcmp.s16 lt, q2, r1 +vcmp.s32 lt, q2, r1 +vcmp.s8 gt, q2, r1 +vcmp.s16 gt, q2, r1 +vcmp.s32 gt, q2, r1 +vcmp.s8 le, q2, r1 +vcmp.s16 le, q2, r1 +vcmp.s32 le, q2, r1 +vctp.8 r0 +vctp.16 r0 +vctp.32 r0 +vctp.64 r0 +#vpnot FIXME: crashes compiler +vpst +vorrt q0, q0, q0 +vpt.f16 eq, q2, q1 +vorrt q0, q1, q2 +vpt.f32 eq, q2, q1 +vorrt q0, q1, q2 +vpt.f16 ne, q2, q1 +vorrt q0, q1, q2 +vpt.f32 ne, q2, q1 +vorrt q0, q1, q2 +vpt.f16 ge, q2, q1 +vorrt q0, q1, q2 +vpt.f32 ge, q2, q1 +vorrt q0, q1, q2 +vpt.f16 lt, q2, q1 +vorrt q0, q1, q2 +vpt.f32 lt, q2, q1 +vorrt q0, q1, q2 +vpt.f16 gt, q2, q1 +vorrt q0, q1, q2 +vpt.f32 gt, q2, q1 +vorrt q0, q1, q2 +vpt.f16 le, q2, q1 +vorrt q0, q1, q2 +vpt.f32 le, q2, q1 +vorrt q0, q1, q2 +vpt.f16 eq, q2, r1 +vorrt q0, q1, q2 +vpt.f32 eq, q2, r1 +vorrt q0, q1, q2 +vpt.f16 ne, q2, r1 +vorrt q0, q1, q2 +vpt.f32 ne, q2, r1 +vorrt q0, q1, q2 +vpt.f16 ge, q2, r1 +vorrt q0, q1, q2 +vpt.f32 ge, q2, r1 +vorrt q0, q1, q2 +vpt.f16 lt, q2, r1 +vorrt q0, q1, q2 +vpt.f32 lt, q2, r1 +vorrt q0, q1, q2 +vpt.f16 gt, q2, r1 +vorrt q0, q1, q2 +vpt.f32 gt, q2, r1 +vorrt q0, q1, q2 +vpt.f16 le, q2, r1 +vorrt q0, q1, q2 +vpt.f32 le, q2, r1 +vorrt q0, q1, q2 +vpt.i8 eq, q2, q1 +vorrt q0, q1, q2 +vpt.i16 eq, q2, q1 +vorrt q0, q1, q2 +vpt.i32 eq, q2, q1 +vorrt q0, q1, q2 +vpt.i8 ne, q2, q1 +vorrt q0, q1, q2 +vpt.i16 ne, q2, q1 +vorrt q0, q1, q2 +vpt.i32 ne, q2, q1 +vorrt q0, q1, q2 +vpt.u8 cs, q2, q1 +vorrt q0, q1, q2 +vpt.u16 cs, q2, q1 +vorrt q0, q1, q2 +vpt.u32 cs, q2, q1 +vorrt q0, q1, q2 +vpt.u8 hi, q2, q1 +vorrt q0, q1, q2 +vpt.u16 hi, q2, q1 +vorrt q0, q1, q2 +vpt.u32 hi, q2, q1 +vorrt q0, q1, q2 +vpt.s8 ge, q2, q1 +vorrt q0, q1, q2 +vpt.s16 ge, q2, q1 +vorrt q0, q1, q2 +vpt.s32 ge, q2, q1 +vorrt q0, q1, q2 +vpt.s8 lt, q2, q1 +vorrt q0, q1, q2 +vpt.s16 lt, q2, q1 +vorrt q0, q1, q2 +vpt.s32 lt, q2, q1 +vorrt q0, q1, q2 +vpt.s8 gt, q2, q1 +vorrt q0, q1, q2 +vpt.s16 gt, q2, q1 +vorrt q0, q1, q2 +vpt.s32 gt, q2, q1 +vorrt q0, q1, q2 +vpt.s8 le, q2, q1 +vorrt q0, q1, q2 +vpt.s16 le, q2, q1 +vorrt q0, q1, q2 +vpt.s32 le, q2, q1 +vorrt q0, q1, q2 +vpt.i8 eq, q2, r1 +vorrt q0, q1, q2 +vpt.i16 eq, q2, r1 +vorrt q0, q1, q2 +vpt.i32 eq, q2, r1 +vorrt q0, q1, q2 +vpt.i8 ne, q2, r1 +vorrt q0, q1, q2 +vpt.i16 ne, q2, r1 +vorrt q0, q1, q2 +vpt.i32 ne, q2, r1 +vorrt q0, q1, q2 +vpt.u8 cs, q2, r1 +vorrt q0, q1, q2 +vpt.u16 cs, q2, r1 +vorrt q0, q1, q2 +vpt.u32 cs, q2, r1 +vorrt q0, q1, q2 +vpt.u8 hi, q2, r1 +vorrt q0, q1, q2 +vpt.u16 hi, q2, r1 +vorrt q0, q1, q2 +vpt.u32 hi, q2, r1 +vorrt q0, q1, q2 +vpt.s8 ge, q2, r1 +vorrt q0, q1, q2 +vpt.s16 ge, q2, r1 +vorrt q0, q1, q2 +vpt.s32 ge, q2, r1 +vorrt q0, q1, q2 +vpt.s8 lt, q2, r1 +vorrt q0, q1, q2 +vpt.s16 lt, q2, r1 +vorrt q0, q1, q2 +vpt.s32 lt, q2, r1 +vorrt q0, q1, q2 +vpt.s8 gt, q2, r1 +vorrt q0, q1, q2 +vpt.s16 gt, q2, r1 +vorrt q0, q1, q2 +vpt.s32 gt, q2, r1 +vorrt q0, q1, q2 +vpt.s8 le, q2, r1 +vorrt q0, q1, q2 +vpt.s16 le, q2, r1 +vorrt q0, q1, q2 +vpt.s32 le, q2, r1 +vorrt q0, q1, q2 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 2.00 vcmp.f16 eq, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.f32 eq, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.f16 ne, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.f32 ne, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.f16 ge, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.f32 ge, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.f16 lt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.f32 lt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.f16 gt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.f32 gt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.f16 le, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.f32 le, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.f16 eq, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.f32 eq, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.f16 ne, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.f32 ne, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.f16 ge, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.f32 ge, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.f16 lt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.f32 lt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.f16 gt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.f32 gt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.f16 le, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.f32 le, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.i8 eq, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.i16 eq, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.i32 eq, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.i8 ne, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.i16 ne, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.i32 ne, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.u8 cs, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.u16 cs, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.u32 cs, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.u8 hi, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.u16 hi, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.u32 hi, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.s8 ge, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.s16 ge, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.s32 ge, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.s8 lt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.s16 lt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.s32 lt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.s8 gt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.s16 gt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.s32 gt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.s8 le, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.s16 le, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.s32 le, q2, q1 +# CHECK-NEXT: 1 1 2.00 vcmp.i8 eq, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.i16 eq, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.i32 eq, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.i8 ne, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.i16 ne, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.i32 ne, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.u8 cs, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.u16 cs, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.u32 cs, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.u8 hi, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.u16 hi, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.u32 hi, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.s8 ge, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.s16 ge, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.s32 ge, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.s8 lt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.s16 lt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.s32 lt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.s8 gt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.s16 gt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.s32 gt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.s8 le, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.s16 le, q2, r1 +# CHECK-NEXT: 1 1 2.00 vcmp.s32 le, q2, r1 +# CHECK-NEXT: 1 1 1.00 vctp.8 r0 +# CHECK-NEXT: 1 1 1.00 vctp.16 r0 +# CHECK-NEXT: 1 1 1.00 vctp.32 r0 +# CHECK-NEXT: 1 1 1.00 vctp.64 r0 +# CHECK-NEXT: 1 1 1.00 U vpst +# CHECK-NEXT: 1 1 2.00 vmovt q0, q0 +# CHECK-NEXT: 1 1 2.00 U vpt.f16 eq, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f32 eq, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f16 ne, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f32 ne, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f16 ge, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f32 ge, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f16 lt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f32 lt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f16 gt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f32 gt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f16 le, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f32 le, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f16 eq, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f32 eq, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f16 ne, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f32 ne, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f16 ge, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f32 ge, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f16 lt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f32 lt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f16 gt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f32 gt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f16 le, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.f32 le, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.i8 eq, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.i16 eq, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.i32 eq, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.i8 ne, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.i16 ne, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.i32 ne, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.u8 cs, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.u16 cs, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.u32 cs, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.u8 hi, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.u16 hi, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.u32 hi, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s8 ge, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s16 ge, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s32 ge, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s8 lt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s16 lt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s32 lt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s8 gt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s16 gt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s32 gt, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s8 le, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s16 le, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s32 le, q2, q1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.i8 eq, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.i16 eq, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.i32 eq, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.i8 ne, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.i16 ne, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.i32 ne, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.u8 cs, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.u16 cs, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.u32 cs, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.u8 hi, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.u16 hi, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.u32 hi, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s8 ge, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s16 ge, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s32 ge, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s8 lt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s16 lt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s32 lt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s8 gt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s16 gt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s32 gt, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s8 le, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s16 le, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 +# CHECK-NEXT: 1 1 2.00 U vpt.s32 le, q2, r1 +# CHECK-NEXT: 1 1 2.00 vorrt q0, q1, q2 + +# CHECK: Resources: +# CHECK-NEXT: [0] - M55UnitALU +# CHECK-NEXT: [1] - M55UnitLoadStore +# CHECK-NEXT: [2] - M55UnitVecALU +# CHECK-NEXT: [3] - M55UnitVecFPALU +# CHECK-NEXT: [4] - M55UnitVecSys + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] +# CHECK-NEXT: - - 146.00 288.00 5.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] Instructions: +# CHECK-NEXT: - - - 2.00 - vcmp.f16 eq, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.f32 eq, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.f16 ne, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.f32 ne, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.f16 ge, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.f32 ge, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.f16 lt, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.f32 lt, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.f16 gt, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.f32 gt, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.f16 le, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.f32 le, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.f16 eq, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.f32 eq, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.f16 ne, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.f32 ne, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.f16 ge, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.f32 ge, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.f16 lt, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.f32 lt, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.f16 gt, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.f32 gt, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.f16 le, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.f32 le, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.i8 eq, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.i16 eq, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.i32 eq, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.i8 ne, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.i16 ne, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.i32 ne, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.u8 cs, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.u16 cs, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.u32 cs, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.u8 hi, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.u16 hi, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.u32 hi, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.s8 ge, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.s16 ge, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.s32 ge, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.s8 lt, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.s16 lt, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.s32 lt, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.s8 gt, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.s16 gt, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.s32 gt, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.s8 le, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.s16 le, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.s32 le, q2, q1 +# CHECK-NEXT: - - - 2.00 - vcmp.i8 eq, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.i16 eq, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.i32 eq, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.i8 ne, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.i16 ne, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.i32 ne, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.u8 cs, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.u16 cs, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.u32 cs, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.u8 hi, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.u16 hi, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.u32 hi, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.s8 ge, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.s16 ge, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.s32 ge, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.s8 lt, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.s16 lt, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.s32 lt, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.s8 gt, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.s16 gt, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.s32 gt, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.s8 le, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.s16 le, q2, r1 +# CHECK-NEXT: - - - 2.00 - vcmp.s32 le, q2, r1 +# CHECK-NEXT: - - - - 1.00 vctp.8 r0 +# CHECK-NEXT: - - - - 1.00 vctp.16 r0 +# CHECK-NEXT: - - - - 1.00 vctp.32 r0 +# CHECK-NEXT: - - - - 1.00 vctp.64 r0 +# CHECK-NEXT: - - - - 1.00 vpst +# CHECK-NEXT: - - 2.00 - - vmovt q0, q0 +# CHECK-NEXT: - - - 2.00 - vpt.f16 eq, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f32 eq, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f16 ne, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f32 ne, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f16 ge, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f32 ge, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f16 lt, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f32 lt, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f16 gt, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f32 gt, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f16 le, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f32 le, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f16 eq, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f32 eq, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f16 ne, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f32 ne, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f16 ge, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f32 ge, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f16 lt, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f32 lt, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f16 gt, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f32 gt, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f16 le, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.f32 le, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.i8 eq, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.i16 eq, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.i32 eq, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.i8 ne, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.i16 ne, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.i32 ne, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.u8 cs, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.u16 cs, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.u32 cs, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.u8 hi, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.u16 hi, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.u32 hi, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s8 ge, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s16 ge, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s32 ge, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s8 lt, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s16 lt, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s32 lt, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s8 gt, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s16 gt, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s32 gt, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s8 le, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s16 le, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s32 le, q2, q1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.i8 eq, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.i16 eq, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.i32 eq, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.i8 ne, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.i16 ne, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.i32 ne, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.u8 cs, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.u16 cs, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.u32 cs, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.u8 hi, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.u16 hi, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.u32 hi, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s8 ge, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s16 ge, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s32 ge, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s8 lt, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s16 lt, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s32 lt, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s8 gt, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s16 gt, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s32 gt, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s8 le, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s16 le, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 +# CHECK-NEXT: - - - 2.00 - vpt.s32 le, q2, r1 +# CHECK-NEXT: - - 2.00 - - vorrt q0, q1, q2 diff --git a/llvm/test/tools/llvm-mca/ARM/m55-storefwd.s b/llvm/test/tools/llvm-mca/ARM/m55-storefwd.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/ARM/m55-storefwd.s @@ -0,0 +1,269 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=thumbv8.1-m.main-none-none-eabi -mcpu=cortex-m55 -timeline < %s | FileCheck %s + +# Most MVE operations are either latency=1 or can forward into stores +vadd.i8 q0, q2, q1 +vstrb.8 q0, [r0, #0] +vadd.f32 q0, q2, q1 +vstrb.8 q0, [r0, #0] +vmul.i8 q0, q2, q1 +vstrb.8 q0, [r0, #0] +vmlas.u32 q0, q2, r0 +vstrb.8 q0, [r0, #0] +vfma.f16 q0, q2, q1 +vstrb.8 q0, [r0, #0] +vmullb.s16 q0, q2, q1 +vstrb.8 q0, [r0, #0] +vcvtt.f32.f16 q0, q2 +vstrb.8 q0, [r0, #0] +vcvtb.f32.f16 q0, q2 +vstrb.8 q0, [r0, #0] + +# The ones that cannot are VCVT.f16.f32 t/b and any VMOVN/VQMOVN/VSHRN/VQSHRN/VRSHRN +vmovnt.s16 q0, q2 +vstrb.8 q0, [r0, #0] +vmovnb.u32 q0, q2 +vstrb.8 q0, [r0, #0] +vqmovnt.s32 q0, q2 +vstrb.8 q0, [r0, #0] +vqmovnb.u16 q0, q2 +vstrb.8 q0, [r0, #0] +vshrnt.s32 q0, q2, #1 +vstrb.8 q0, [r0, #0] +vshrnb.u16 q0, q2, #1 +vstrb.8 q0, [r0, #0] +vqshrnt.s16 q0, q2, #1 +vstrb.8 q0, [r0, #0] +vqshrnb.u32 q0, q2, #1 +vstrb.8 q0, [r0, #0] +vrshrnt.s16 q0, q2, #1 +vstrb.8 q0, [r0, #0] +vrshrnb.u16 q0, q2, #1 +vstrb.8 q0, [r0, #0] +vcvtt.f16.f32 q0, q2 +vstrb.8 q0, [r0, #0] +vcvtb.f16.f32 q0, q2 +vstrb.8 q0, [r0, #0] + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 4000 +# CHECK-NEXT: Total Cycles: 6401 +# CHECK-NEXT: Total uOps: 4000 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.62 +# CHECK-NEXT: IPC: 0.62 +# CHECK-NEXT: Block RThroughput: 40.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 2.00 vadd.i8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 1 2.00 vadd.f32 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 2 2.00 vmul.i8 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 2 2.00 vmlas.i32 q0, q2, r0 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 2 2.00 vfma.f16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 2 2.00 vmullb.s16 q0, q2, q1 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 2 2.00 vcvtt.f32.f16 q0, q2 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 2 2.00 vcvtb.f32.f16 q0, q2 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 3 2.00 vmovnt.i16 q0, q2 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 3 2.00 vmovnb.i32 q0, q2 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 3 2.00 vqmovnt.s32 q0, q2 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 3 2.00 vqmovnb.u16 q0, q2 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 3 2.00 vshrnt.i32 q0, q2, #1 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 3 2.00 vshrnb.i16 q0, q2, #1 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 3 2.00 vqshrnt.s16 q0, q2, #1 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 3 2.00 vqshrnb.u32 q0, q2, #1 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 3 2.00 vrshrnt.i16 q0, q2, #1 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 3 2.00 vrshrnb.i16 q0, q2, #1 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 3 2.00 vcvtt.f16.f32 q0, q2 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] +# CHECK-NEXT: 1 3 2.00 vcvtb.f16.f32 q0, q2 +# CHECK-NEXT: 1 1 2.00 * vstrb.8 q0, [r0] + +# CHECK: Resources: +# CHECK-NEXT: [0] - M55UnitALU +# CHECK-NEXT: [1] - M55UnitLoadStore +# CHECK-NEXT: [2] - M55UnitVecALU +# CHECK-NEXT: [3] - M55UnitVecFPALU +# CHECK-NEXT: [4] - M55UnitVecSys + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] +# CHECK-NEXT: - 40.00 22.00 18.00 - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] Instructions: +# CHECK-NEXT: - - 2.00 - - vadd.i8 q0, q2, q1 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - - 2.00 - vadd.f32 q0, q2, q1 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - - 2.00 - vmul.i8 q0, q2, q1 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - - 2.00 - vmlas.i32 q0, q2, r0 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - - 2.00 - vfma.f16 q0, q2, q1 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - - 2.00 - vmullb.s16 q0, q2, q1 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - - 2.00 - vcvtt.f32.f16 q0, q2 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - - 2.00 - vcvtb.f32.f16 q0, q2 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - 2.00 - - vmovnt.i16 q0, q2 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - 2.00 - - vmovnb.i32 q0, q2 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - 2.00 - - vqmovnt.s32 q0, q2 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - 2.00 - - vqmovnb.u16 q0, q2 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - 2.00 - - vshrnt.i32 q0, q2, #1 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - 2.00 - - vshrnb.i16 q0, q2, #1 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - 2.00 - - vqshrnt.s16 q0, q2, #1 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - 2.00 - - vqshrnb.u32 q0, q2, #1 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - 2.00 - - vrshrnt.i16 q0, q2, #1 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - 2.00 - - vrshrnb.i16 q0, q2, #1 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - - 2.00 - vcvtt.f16.f32 q0, q2 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] +# CHECK-NEXT: - - - 2.00 - vcvtb.f16.f32 q0, q2 +# CHECK-NEXT: - 2.00 - - - vstrb.8 q0, [r0] + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 + +# CHECK: [0,0] DE . . . . . . . . . . . . . . . . vadd.i8 q0, q2, q1 +# CHECK-NEXT: [0,1] .DE . . . . . . . . . . . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,2] . DE . . . . . . . . . . . . . . . . vadd.f32 q0, q2, q1 +# CHECK-NEXT: [0,3] . DE. . . . . . . . . . . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,4] . DeE . . . . . . . . . . . . . . . vmul.i8 q0, q2, q1 +# CHECK-NEXT: [0,5] . DE . . . . . . . . . . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,6] . .DeE . . . . . . . . . . . . . . . vmlas.i32 q0, q2, r0 +# CHECK-NEXT: [0,7] . . DE . . . . . . . . . . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,8] . . DeE . . . . . . . . . . . . . . vfma.f16 q0, q2, q1 +# CHECK-NEXT: [0,9] . . DE . . . . . . . . . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,10] . . DeE . . . . . . . . . . . . . . vmullb.s16 q0, q2, q1 +# CHECK-NEXT: [0,11] . . .DE . . . . . . . . . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,12] . . . DeE. . . . . . . . . . . . . . vcvtt.f32.f16 q0, q2 +# CHECK-NEXT: [0,13] . . . DE. . . . . . . . . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,14] . . . DeE . . . . . . . . . . . . . vcvtb.f32.f16 q0, q2 +# CHECK-NEXT: [0,15] . . . DE . . . . . . . . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,16] . . . .DeeE. . . . . . . . . . . . . vmovnt.i16 q0, q2 +# CHECK-NEXT: [0,17] . . . . DE . . . . . . . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,18] . . . . DeeE . . . . . . . . . . . . vmovnb.i32 q0, q2 +# CHECK-NEXT: [0,19] . . . . . DE. . . . . . . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,20] . . . . . DeeE . . . . . . . . . . . vqmovnt.s32 q0, q2 +# CHECK-NEXT: [0,21] . . . . . . DE . . . . . . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,22] . . . . . . DeeE . . . . . . . . . . vqmovnb.u16 q0, q2 +# CHECK-NEXT: [0,23] . . . . . . .DE . . . . . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,24] . . . . . . . DeeE . . . . . . . . . vshrnt.i32 q0, q2, #1 +# CHECK-NEXT: [0,25] . . . . . . . DE . . . . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,26] . . . . . . . .DeeE. . . . . . . . . vshrnb.i16 q0, q2, #1 +# CHECK-NEXT: [0,27] . . . . . . . . DE . . . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,28] . . . . . . . . DeeE . . . . . . . . vqshrnt.s16 q0, q2, #1 +# CHECK-NEXT: [0,29] . . . . . . . . . DE. . . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,30] . . . . . . . . . DeeE . . . . . . . vqshrnb.u32 q0, q2, #1 +# CHECK-NEXT: [0,31] . . . . . . . . . . DE . . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,32] . . . . . . . . . . DeeE . . . . . . vrshrnt.i16 q0, q2, #1 +# CHECK-NEXT: [0,33] . . . . . . . . . . .DE . . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,34] . . . . . . . . . . . DeeE . . . . . vrshrnb.i16 q0, q2, #1 +# CHECK-NEXT: [0,35] . . . . . . . . . . . DE . . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,36] . . . . . . . . . . . .DeeE. . . . . vcvtt.f16.f32 q0, q2 +# CHECK-NEXT: [0,37] . . . . . . . . . . . . DE . . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [0,38] . . . . . . . . . . . . DeeE . . . . vcvtb.f16.f32 q0, q2 +# CHECK-NEXT: [0,39] . . . . . . . . . . . . . DE. . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [1,0] . . . . . . . . . . . . . DE . . . vadd.i8 q0, q2, q1 +# CHECK-NEXT: [1,1] . . . . . . . . . . . . . DE . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [1,2] . . . . . . . . . . . . . .DE . . . vadd.f32 q0, q2, q1 +# CHECK-NEXT: [1,3] . . . . . . . . . . . . . . DE . . . vstrb.8 q0, [r0] +# CHECK-NEXT: [1,4] . . . . . . . . . . . . . . DeE . . vmul.i8 q0, q2, q1 +# CHECK-NEXT: [1,5] . . . . . . . . . . . . . . DE . . vstrb.8 q0, [r0] +# CHECK-NEXT: [1,6] . . . . . . . . . . . . . . DeE . . vmlas.i32 q0, q2, r0 +# CHECK-NEXT: [1,7] . . . . . . . . . . . . . . .DE . . vstrb.8 q0, [r0] +# CHECK-NEXT: [1,8] . . . . . . . . . . . . . . . DeE. . vfma.f16 q0, q2, q1 +# CHECK-NEXT: [1,9] . . . . . . . . . . . . . . . DE. . vstrb.8 q0, [r0] +# CHECK-NEXT: [1,10] . . . . . . . . . . . . . . . DeE . vmullb.s16 q0, q2, q1 +# CHECK-NEXT: [1,11] . . . . . . . . . . . . . . . DE . vstrb.8 q0, [r0] +# CHECK-NEXT: [1,12] . . . . . . . . . . . . . . . .DeE. vcvtt.f32.f16 q0, q2 +# CHECK-NEXT: [1,13] . . . . . . . . . . . . . . . . DE. vstrb.8 q0, [r0] +# CHECK-NEXT: Truncated display due to cycle limit + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 10 0.0 0.0 0.0 vadd.i8 q0, q2, q1 +# CHECK-NEXT: 1. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 2. 10 0.0 0.0 0.0 vadd.f32 q0, q2, q1 +# CHECK-NEXT: 3. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 4. 10 0.0 0.0 0.0 vmul.i8 q0, q2, q1 +# CHECK-NEXT: 5. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 6. 10 0.0 0.0 0.0 vmlas.i32 q0, q2, r0 +# CHECK-NEXT: 7. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 8. 10 0.0 0.0 0.0 vfma.f16 q0, q2, q1 +# CHECK-NEXT: 9. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 10. 10 0.0 0.0 0.0 vmullb.s16 q0, q2, q1 +# CHECK-NEXT: 11. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 12. 10 0.0 0.0 0.0 vcvtt.f32.f16 q0, q2 +# CHECK-NEXT: 13. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 14. 10 0.0 0.0 0.0 vcvtb.f32.f16 q0, q2 +# CHECK-NEXT: 15. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 16. 10 0.0 0.0 0.0 vmovnt.i16 q0, q2 +# CHECK-NEXT: 17. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 18. 10 0.0 0.0 0.0 vmovnb.i32 q0, q2 +# CHECK-NEXT: 19. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 20. 10 0.0 0.0 0.0 vqmovnt.s32 q0, q2 +# CHECK-NEXT: 21. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 22. 10 0.0 0.0 0.0 vqmovnb.u16 q0, q2 +# CHECK-NEXT: 23. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 24. 10 0.0 0.0 0.0 vshrnt.i32 q0, q2, #1 +# CHECK-NEXT: 25. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 26. 10 0.0 0.0 0.0 vshrnb.i16 q0, q2, #1 +# CHECK-NEXT: 27. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 28. 10 0.0 0.0 0.0 vqshrnt.s16 q0, q2, #1 +# CHECK-NEXT: 29. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 30. 10 0.0 0.0 0.0 vqshrnb.u32 q0, q2, #1 +# CHECK-NEXT: 31. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 32. 10 0.0 0.0 0.0 vrshrnt.i16 q0, q2, #1 +# CHECK-NEXT: 33. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 34. 10 0.0 0.0 0.0 vrshrnb.i16 q0, q2, #1 +# CHECK-NEXT: 35. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 36. 10 0.0 0.0 0.0 vcvtt.f16.f32 q0, q2 +# CHECK-NEXT: 37. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 38. 10 0.0 0.0 0.0 vcvtb.f16.f32 q0, q2 +# CHECK-NEXT: 39. 10 0.0 0.0 0.0 vstrb.8 q0, [r0] +# CHECK-NEXT: 10 0.0 0.0 0.0