Index: llvm/lib/Target/AArch64/AArch64.td =================================================================== --- llvm/lib/Target/AArch64/AArch64.td +++ llvm/lib/Target/AArch64/AArch64.td @@ -748,6 +748,7 @@ include "AArch64SchedAmpere1.td" include "AArch64SchedNeoverseN1.td" include "AArch64SchedNeoverseN2.td" +include "AArch64SchedNeoverseV1.td" include "AArch64SchedNeoverseV2.td" def TuneA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", @@ -1410,9 +1411,9 @@ ProcessorFeatures.NeoverseN1, [TuneNeoverseN1]>; def : ProcessorModel<"neoverse-n2", NeoverseN2Model, ProcessorFeatures.NeoverseN2, [TuneNeoverseN2]>; -def : ProcessorModel<"neoverse-512tvb", NeoverseN2Model, +def : ProcessorModel<"neoverse-512tvb", NeoverseV1Model, ProcessorFeatures.Neoverse512TVB, [TuneNeoverse512TVB]>; -def : ProcessorModel<"neoverse-v1", NeoverseN2Model, +def : ProcessorModel<"neoverse-v1", NeoverseV1Model, ProcessorFeatures.NeoverseV1, [TuneNeoverseV1]>; def : ProcessorModel<"neoverse-v2", NeoverseV2Model, ProcessorFeatures.NeoverseV2, [TuneNeoverseV2]>; Index: llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td =================================================================== --- /dev/null +++ llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td @@ -0,0 +1,1863 @@ +//=- AArch64SchedNeoverseV1.td - NeoverseV1 Scheduling Model -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the scheduling model for the Arm Neoverse V1 processors. +// +// References: +// - "Arm Neoverse V1 Software Optimization Guide" +// - "Arm Neoverse V1 Platform: Unleashing a new performance tier for Arm-based computing" +// https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/neoverse-v1-platform-a-new-performance-tier-for-arm +// - "Neoverse V1" +// https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_v1 + +// +//===----------------------------------------------------------------------===// + +def NeoverseV1Model : SchedMachineModel { + let IssueWidth = 15; // Maximum micro-ops dispatch rate. + let MicroOpBufferSize = 256; // Micro-op re-order buffer. + let LoadLatency = 4; // Optimistic load latency. + let MispredictPenalty = 11; // Cycles cost of branch mispredicted. + let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. + let CompleteModel = 0; // ***FIXME*** + + list UnsupportedFeatures = !listconcat(SMEUnsupported.F, [HasMTE]); +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Neoverse V1. +// Instructions are first fetched and then decoded into internal macro-ops +// (MOPs). From there, the MOPs proceed through register renaming and dispatch +// stages. A MOP can be split into one or more micro-ops further down the +// pipeline, after the decode stage. Once dispatched, micro-ops wait for their +// operands and issue out-of-order to one of the issue pipelines. Each issue +// pipeline can accept one micro-op per cycle. + +let SchedModel = NeoverseV1Model in { + +// Define the issue ports. +def V1UnitB : ProcResource<2>; // Branch 0/1 +def V1UnitS : ProcResource<2>; // Integer single cycle 0/1 +def V1UnitM0 : ProcResource<2>; // Integer multicycle 0 +def V1UnitM1 : ProcResource<2>; // Integer multicycle 1 +def V1UnitL01 : ProcResource<2>; // Load/Store 0/1 +def V1UnitL2 : ProcResource<1>; // Load 2 +def V1UnitD : ProcResource<2>; // Store data 0/1 +def V1UnitV0 : ProcResource<1>; // FP/ASIMD 0 +def V1UnitV1 : ProcResource<1>; // FP/ASIMD 1 +def V1UnitV2 : ProcResource<1>; // FP/ASIMD 2 +def V1UnitV3 : ProcResource<1>; // FP/ASIMD 3 + +def V1UnitI : ProcResGroup<[V1UnitS, + V1UnitM0, V1UnitM1]>; // Integer units +def V1UnitJ : ProcResGroup<[V1UnitS, V1UnitM0]>; // Integer 0-2 units +def V1UnitM : ProcResGroup<[V1UnitM0, V1UnitM1]>; // Integer multicycle units +def V1UnitL : ProcResGroup<[V1UnitL01, V1UnitL2]>; // Load/Store units +def V1UnitV : ProcResGroup<[V1UnitV0, V1UnitV1, + V1UnitV2, V1UnitV3]>; // FP/ASIMD units +def V1UnitV01 : ProcResGroup<[V1UnitV0, V1UnitV1]>; // FP/ASIMD 0/1 units +def V1UnitV02 : ProcResGroup<[V1UnitV0, V1UnitV2]>; // FP/ASIMD 0/2 units +def V1UnitV13 : ProcResGroup<[V1UnitV1, V1UnitV3]>; // FP/ASIMD 1/3 units + +// Define commonly used read types. + +// No generic forwarding is provided for these types. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } + + +//===----------------------------------------------------------------------===// +// Define generic 0 micro-op types + +let Latency = 0, NumMicroOps = 0 in +def V1Write_0c_0Z : SchedWriteRes<[]>; + + +//===----------------------------------------------------------------------===// +// Define generic 1 micro-op types + +def V1Write_1c_1B : SchedWriteRes<[V1UnitB]> { let Latency = 1; } +def V1Write_1c_1I : SchedWriteRes<[V1UnitI]> { let Latency = 1; } +def V1Write_1c_1J : SchedWriteRes<[V1UnitJ]> { let Latency = 1; } +def V1Write_4c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 4; } +def V1Write_6c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 6; } +def V1Write_1c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 1; } +def V1Write_6c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 6; } +def V1Write_2c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 2; } +def V1Write_3c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 3; } +def V1Write_4c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 4; } +def V1Write_1c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 1; } +def V1Write_2c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 2; } +def V1Write_3c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 3; } +def V1Write_5c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 5; } +def V1Write_12c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 12; + let ResourceCycles = [5]; } +def V1Write_20c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 20; + let ResourceCycles = [5]; } +def V1Write_2c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 2; } +def V1Write_3c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 3; } +def V1Write_4c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 4; } +def V1Write_5c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 5; } +def V1Write_2c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 2; } +def V1Write_3c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 3; } +def V1Write_4c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 4; } +def V1Write_6c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 6; } +def V1Write_10c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 10; + let ResourceCycles = [7]; } +def V1Write_12c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 12; + let ResourceCycles = [7]; } +def V1Write_13c10_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 13; + let ResourceCycles = [10]; } +def V1Write_15c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 15; + let ResourceCycles = [7]; } +def V1Write_16c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 16; + let ResourceCycles = [7]; } +def V1Write_20c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 20; + let ResourceCycles = [7]; } +def V1Write_2c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 2; } +def V1Write_3c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 3; } +def V1Write_4c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4; } +def V1Write_5c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 5; } +def V1Write_3c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 3; } +def V1Write_4c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 4; } +def V1Write_7c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 7; + let ResourceCycles = [7]; } +def V1Write_10c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10; + let ResourceCycles = [7]; } +def V1Write_13c5_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13; + let ResourceCycles = [5]; } +def V1Write_13c11_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13; + let ResourceCycles = [11]; } +def V1Write_15c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 15; + let ResourceCycles = [7]; } +def V1Write_16c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16; + let ResourceCycles = [7]; } +def V1Write_2c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 2; } +def V1Write_3c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 3; } +def V1Write_4c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 4; } +def V1Write_2c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 2; } +def V1Write_4c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 4; } + +//===----------------------------------------------------------------------===// +// Define generic 2 micro-op types + +let Latency = 1, NumMicroOps = 2 in +def V1Write_1c_1B_1S : SchedWriteRes<[V1UnitB, V1UnitS]>; +let Latency = 6, NumMicroOps = 2 in +def V1Write_6c_1B_1M0 : SchedWriteRes<[V1UnitB, V1UnitM0]>; +let Latency = 3, NumMicroOps = 2 in +def V1Write_3c_1I_1M : SchedWriteRes<[V1UnitI, V1UnitM]>; +let Latency = 5, NumMicroOps = 2 in +def V1Write_5c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]>; +let Latency = 7, NumMicroOps = 2 in +def V1Write_7c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]>; +let Latency = 6, NumMicroOps = 2 in +def V1Write_6c_2L : SchedWriteRes<[V1UnitL, V1UnitL]>; +let Latency = 6, NumMicroOps = 2 in +def V1Write_6c_1L_1M : SchedWriteRes<[V1UnitL, V1UnitM]>; +let Latency = 8, NumMicroOps = 2 in +def V1Write_8c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>; +let Latency = 9, NumMicroOps = 2 in +def V1Write_9c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>; +let Latency = 11, NumMicroOps = 2 in +def V1Write_11c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>; +let Latency = 1, NumMicroOps = 2 in +def V1Write_1c_1L01_1D : SchedWriteRes<[V1UnitL01, V1UnitD]>; +let Latency = 6, NumMicroOps = 2 in +def V1Write_6c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]>; +let Latency = 7, NumMicroOps = 2 in +def V1Write_7c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]>; +let Latency = 2, NumMicroOps = 2 in +def V1Write_2c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>; +let Latency = 4, NumMicroOps = 2 in +def V1Write_4c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>; +let Latency = 6, NumMicroOps = 2 in +def V1Write_6c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>; +let Latency = 2, NumMicroOps = 2 in +def V1Write_2c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>; +let Latency = 4, NumMicroOps = 2 in +def V1Write_4c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>; +let Latency = 2, NumMicroOps = 2 in +def V1Write_2c_2M0 : SchedWriteRes<[V1UnitM0, V1UnitM0]>; +let Latency = 3, NumMicroOps = 2 in +def V1Write_3c_2M0 : SchedWriteRes<[V1UnitM0, V1UnitM0]>; +let Latency = 9, NumMicroOps = 2 in +def V1Write_9c_1M0_1L : SchedWriteRes<[V1UnitM0, V1UnitL]>; +let Latency = 5, NumMicroOps = 2 in +def V1Write_5c_1M0_1V : SchedWriteRes<[V1UnitM0, V1UnitV]>; +let Latency = 4, NumMicroOps = 2 in +def V1Write_4c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV0]>; +let Latency = 7, NumMicroOps = 2 in +def V1Write_7c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV1]>; +let Latency = 5, NumMicroOps = 2 in +def V1Write_5c_1M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitV01]>; +let Latency = 6, NumMicroOps = 2 in +def V1Write_6c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]>; +let Latency = 9, NumMicroOps = 2 in +def V1Write_9c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]>; +let Latency = 4, NumMicroOps = 2 in +def V1Write_4c_2V : SchedWriteRes<[V1UnitV, V1UnitV]>; +let Latency = 4, NumMicroOps = 2 in +def V1Write_4c_2V0 : SchedWriteRes<[V1UnitV0, V1UnitV0]>; +let Latency = 5, NumMicroOps = 2 in +def V1Write_5c_2V0 : SchedWriteRes<[V1UnitV0, V1UnitV0]>; +let Latency = 2, NumMicroOps = 2 in +def V1Write_2c_2V01 : SchedWriteRes<[V1UnitV01, V1UnitV01]>; +let Latency = 4, NumMicroOps = 2 in +def V1Write_4c_2V01 : SchedWriteRes<[V1UnitV01, V1UnitV01]>; +let Latency = 4, NumMicroOps = 2 in +def V1Write_4c_2V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>; +let Latency = 6, NumMicroOps = 2 in +def V1Write_6c_2V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>; +let Latency = 4, NumMicroOps = 2 in +def V1Write_4c_1V13_1V : SchedWriteRes<[V1UnitV13, V1UnitV]>; +let Latency = 4, NumMicroOps = 2 in +def V1Write_4c_2V13 : SchedWriteRes<[V1UnitV13, V1UnitV13]>; + +//===----------------------------------------------------------------------===// +// Define generic 3 micro-op types + +let Latency = 2, NumMicroOps = 3 in +def V1Write_2c_1I_1L01_1V01 : SchedWriteRes<[V1UnitI, V1UnitL01, V1UnitV01]>; +let Latency = 7, NumMicroOps = 3 in +def V1Write_7c_2M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitM0, V1UnitV01]>; +let Latency = 8, NumMicroOps = 3 in +def V1Write_8c_1L_2V : SchedWriteRes<[V1UnitL, V1UnitV, V1UnitV]>; +let Latency = 6, NumMicroOps = 3 in +def V1Write_6c_3L : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL]>; +let Latency = 1, NumMicroOps = 3 in +def V1Write_1c_2L01_1D : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitD]>; +let Latency = 2, NumMicroOps = 3 in +def V1Write_2c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>; +let Latency = 4, NumMicroOps = 3 in +def V1Write_4c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>; +let Latency = 2, NumMicroOps = 3 in +def V1Write_2c_2L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitV01]>; +let Latency = 6, NumMicroOps = 3 in +def V1Write_6c_3V : SchedWriteRes<[V1UnitV, V1UnitV, V1UnitV]>; +let Latency = 4, NumMicroOps = 3 in +def V1Write_4c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>; +let Latency = 6, NumMicroOps = 3 in +def V1Write_6c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>; +let Latency = 8, NumMicroOps = 3 in +def V1Write_8c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>; + +//===----------------------------------------------------------------------===// +// Define generic 4 micro-op types + +let Latency = 8, NumMicroOps = 4 in +def V1Write_8c_2M0_2V0 : SchedWriteRes<[V1UnitM0, V1UnitM0, + V1UnitV0, V1UnitV0]>; +let Latency = 7, NumMicroOps = 4 in +def V1Write_7c_4L : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, V1UnitL]>; +let Latency = 8, NumMicroOps = 4 in +def V1Write_8c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL, + V1UnitV, V1UnitV]>; +let Latency = 9, NumMicroOps = 4 in +def V1Write_9c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL, + V1UnitV, V1UnitV]>; +let Latency = 11, NumMicroOps = 4 in +def V1Write_11c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL, + V1UnitV, V1UnitV]>; +let Latency = 10, NumMicroOps = 4 in +def V1Write_10c_2L01_2V : SchedWriteRes<[V1UnitL01, V1UnitL01, + V1UnitV, V1UnitV]>; +let Latency = 2, NumMicroOps = 4 in +def V1Write_2c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, + V1UnitV01, V1UnitV01]>; +let Latency = 4, NumMicroOps = 4 in +def V1Write_4c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, + V1UnitV01, V1UnitV01]>; +let Latency = 8, NumMicroOps = 4 in +def V1Write_8c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, + V1UnitV01, V1UnitV01]>; +let Latency = 9, NumMicroOps = 4 in +def V1Write_9c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, + V1UnitV01, V1UnitV01]>; +let Latency = 10, NumMicroOps = 4 in +def V1Write_10c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, + V1UnitV01, V1UnitV01]>; +let Latency = 10, NumMicroOps = 4 in +def V1Write_10c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01, + V1UnitV1, V1UnitV1]>; +let Latency = 12, NumMicroOps = 4 in +def V1Write_12c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01, + V1UnitV1, V1UnitV1]>; +let Latency = 6, NumMicroOps = 4 in +def V1Write_6c_4V0 : SchedWriteRes<[V1UnitV0, V1UnitV0, + V1UnitV0, V1UnitV0]>; +let Latency = 12, NumMicroOps = 4 in +def V1Write_12c_4V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, + V1UnitV01, V1UnitV01]>; +let Latency = 6, NumMicroOps = 4 in +def V1Write_6c_4V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>; + +//===----------------------------------------------------------------------===// +// Define generic 5 micro-op types + +let Latency = 8, NumMicroOps = 5 in +def V1Write_8c_2L_3V : SchedWriteRes<[V1UnitL, V1UnitL, + V1UnitV, V1UnitV, V1UnitV]>; +let Latency = 14, NumMicroOps = 5 in +def V1Write_14c_1V_1V0_2V1_1V13 : SchedWriteRes<[V1UnitV, + V1UnitV0, + V1UnitV1, V1UnitV1, + V1UnitV13]>; +let Latency = 9, NumMicroOps = 5 in +def V1Write_9c_1V_4V01 : SchedWriteRes<[V1UnitV, + V1UnitV01, V1UnitV01, + V1UnitV01, V1UnitV01]>; +let Latency = 6, NumMicroOps = 5 in +def V1Write_6c_5V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, + V1UnitV01, V1UnitV01, V1UnitV01]>; + +//===----------------------------------------------------------------------===// +// Define generic 6 micro-op types + +let Latency = 6, NumMicroOps = 6 in +def V1Write_6c_3L_3V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, + V1UnitV, V1UnitV, V1UnitV]>; +let Latency = 8, NumMicroOps = 6 in +def V1Write_8c_3L_3V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, + V1UnitV, V1UnitV, V1UnitV]>; +let Latency = 2, NumMicroOps = 6 in +def V1Write_2c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, + V1UnitV01, V1UnitV01, V1UnitV01]>; +let Latency = 5, NumMicroOps = 6 in +def V1Write_5c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, + V1UnitV01, V1UnitV01, V1UnitV01]>; +let Latency = 6, NumMicroOps = 6 in +def V1Write_6c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, + V1UnitV01, V1UnitV01, V1UnitV01]>; +let Latency = 11, NumMicroOps = 6 in +def V1Write_11c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, + V1UnitV01, V1UnitV01, V1UnitV01]>; +let Latency = 11, NumMicroOps = 6 in +def V1Write_11c_1V_5V01 : SchedWriteRes<[V1UnitV, + V1UnitV01, V1UnitV01, + V1UnitV01, V1UnitV01, V1UnitV01]>; +let Latency = 13, NumMicroOps = 6 in +def V1Write_13c_6V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01, + V1UnitV01, V1UnitV01, V1UnitV01]>; + +//===----------------------------------------------------------------------===// +// Define generic 7 micro-op types + +let Latency = 8, NumMicroOps = 7 in +def V1Write_8c_3L_4V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, + V1UnitV, V1UnitV, V1UnitV, V1UnitV]>; +let Latency = 8, NumMicroOps = 7 in +def V1Write_13c_3L01_1S_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, + V1UnitS, + V1UnitV01, V1UnitV01, V1UnitV01]>; + +//===----------------------------------------------------------------------===// +// Define generic 8 micro-op types + +let Latency = 9, NumMicroOps = 8 in +def V1Write_9c_4L_4V : SchedWriteRes<[V1UnitL, V1UnitL, + V1UnitL, V1UnitL, + V1UnitV, V1UnitV, + V1UnitV, V1UnitV]>; +let Latency = 2, NumMicroOps = 8 in +def V1Write_2c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, + V1UnitL01, V1UnitL01, + V1UnitV01, V1UnitV01, + V1UnitV01, V1UnitV01]>; +let Latency = 4, NumMicroOps = 8 in +def V1Write_4c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, + V1UnitL01, V1UnitL01, + V1UnitV01, V1UnitV01, + V1UnitV01, V1UnitV01]>; +let Latency = 12, NumMicroOps = 8 in +def V1Write_12c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, + V1UnitL01, V1UnitL01, + V1UnitV01, V1UnitV01, + V1UnitV01, V1UnitV01]>; + +//===----------------------------------------------------------------------===// +// Define generic 10 micro-op types + +let Latency = 13, NumMicroOps = 10 in +def V1Write_13c_4L01_2S_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, + V1UnitL01, V1UnitL01, + V1UnitS, V1UnitS, + V1UnitV01, V1UnitV01, + V1UnitV01, V1UnitV01]>; +let Latency = 7, NumMicroOps = 10 in +def V1Write_7c_5L01_5V : SchedWriteRes<[V1UnitL01, V1UnitL01, + V1UnitL01, V1UnitL01, V1UnitL01, + V1UnitV, V1UnitV, + V1UnitV, V1UnitV, V1UnitV]>; +let Latency = 11, NumMicroOps = 10 in +def V1Write_11c_10V0 : SchedWriteRes<[V1UnitV0, + V1UnitV0, V1UnitV0, V1UnitV0, + V1UnitV0, V1UnitV0, V1UnitV0, + V1UnitV0, V1UnitV0, V1UnitV0]>; + +//===----------------------------------------------------------------------===// +// Define generic 12 micro-op types + +let Latency = 7, NumMicroOps = 12 in +def V1Write_7c_6L01_6V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, + V1UnitL01, V1UnitL01, V1UnitL01, + V1UnitV01, V1UnitV01, V1UnitV01, + V1UnitV01, V1UnitV01, V1UnitV01]>; + +//===----------------------------------------------------------------------===// +// Define generic 15 micro-op types + +let Latency = 7, NumMicroOps = 15 in +def V1Write_7c_5L01_5S_5V : SchedWriteRes<[V1UnitL01, V1UnitL01, + V1UnitL01, V1UnitL01, V1UnitL01, + V1UnitS, V1UnitS, + V1UnitS, V1UnitS, V1UnitS, + V1UnitV, V1UnitV, + V1UnitV, V1UnitV, V1UnitV]>; + + +//===----------------------------------------------------------------------===// +// Define generic 18 micro-op types + +let Latency = 19, NumMicroOps = 18 in +def V1Write_11c_9L01_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, + V1UnitL01, V1UnitL01, V1UnitL01, + V1UnitL01, V1UnitL01, V1UnitL01, + V1UnitV, V1UnitV, V1UnitV, + V1UnitV, V1UnitV, V1UnitV, + V1UnitV, V1UnitV, V1UnitV]>; +let Latency = 19, NumMicroOps = 18 in +def V1Write_19c_18V0 : SchedWriteRes<[V1UnitV0, V1UnitV0, V1UnitV0, + V1UnitV0, V1UnitV0, V1UnitV0, + V1UnitV0, V1UnitV0, V1UnitV0, + V1UnitV0, V1UnitV0, V1UnitV0, + V1UnitV0, V1UnitV0, V1UnitV0, + V1UnitV0, V1UnitV0, V1UnitV0]>; + +//===----------------------------------------------------------------------===// +// Define generic 27 micro-op types + +let Latency = 11, NumMicroOps = 27 in +def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, + V1UnitL01, V1UnitL01, V1UnitL01, + V1UnitL01, V1UnitL01, V1UnitL01, + V1UnitS, V1UnitS, V1UnitS, + V1UnitS, V1UnitS, V1UnitS, + V1UnitS, V1UnitS, V1UnitS, + V1UnitV, V1UnitV, V1UnitV, + V1UnitV, V1UnitV, V1UnitV, + V1UnitV, V1UnitV, V1UnitV]>; + + +// Miscellaneous Instructions +// ----------------------------------------------------------------------------- + +// COPY +def : SchedAlias; +def : InstRW<[WriteI], (instrs COPY)>; + +// MSR +def : WriteRes { let Latency = 1; } + + +// Branch Instructions +// ----------------------------------------------------------------------------- + +// Branch, immed +// Compare and branch +def : SchedAlias; + +// Branch, register +def : SchedAlias; + +// Branch and link, immed +// Branch and link, register +def : InstRW<[V1Write_1c_1B_1S], (instrs BL, BLR)>; + +// Compare and branch +def : InstRW<[V1Write_1c_1B], (instregex "^[CT]BN?Z[XW]$")>; + + +// Arithmetic and Logical Instructions +// ----------------------------------------------------------------------------- + +// ALU, basic +// Conditional compare +// Conditional select +// Logical, basic +// Address generation +// Count leading +// Reverse bits/bytes +// Move immediate +// Covered by "SchedAlias (WriteI...)" above + +// ALU, basic, flagset +def : InstRW<[V1Write_1c_1J], + (instregex "^(ADD|SUB)S[WX]r[ir]$", + "^(ADC|SBC)S[WX]r$", + "^ANDS[WX]ri$", + "^(AND|BIC)S[WX]rr$")>; + +// ALU, extend and shift +def : SchedAlias; + +// Arithmetic, LSL shift, shift <= 4 +// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 +def V1WriteISReg : SchedWriteVariant< + [SchedVar, + SchedVar]>; +def : SchedAlias; + +// Arithmetic, flagset, LSL shift, shift <= 4 +// Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 +def V1WriteISRegS : SchedWriteVariant< + [SchedVar, + SchedVar]>; +def : InstRW<[V1WriteISRegS], + (instregex "^(ADD|SUB)S(([WX]r[sx])|Xrx64)$")>; + +// Logical, shift, no flagset +def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>; + +// Logical, shift, flagset +def : InstRW<[V1Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>; + +// Flag manipulation instructions +def : InstRW<[V1Write_1c_1J], (instrs SETF8, SETF16, RMIF, CFINV)>; + + +// Divide and multiply instructions +// ----------------------------------------------------------------------------- + +// Divide +def : SchedAlias; +def : SchedAlias; + +// Multiply +// Multiply accumulate +// Multiply accumulate, long +// Multiply long +def V1WriteIM : SchedWriteVariant< + [SchedVar, + SchedVar]>; +def : SchedAlias; +def : SchedAlias; + +// Multiply high +def : InstRW<[V1Write_3c_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>; + + +// Pointer Authentication Instructions (v8.3 PAC) +// ----------------------------------------------------------------------------- + +// Authenticate data address +// Authenticate instruction address +// Compute pointer authentication code for data address +// Compute pointer authentication code, using generic key +// Compute pointer authentication code for instruction address +def : InstRW<[V1Write_5c_1M0], (instregex "^AUT", + "^PAC")>; + +// Branch and link, register, with pointer authentication +// Branch, register, with pointer authentication +// Branch, return, with pointer authentication +def : InstRW<[V1Write_6c_1B_1M0], (instregex "^BL?RA[AB]Z?$", + "^E?RETA[AB]$")>; + +// Load register, with pointer authentication +def : InstRW<[V1Write_9c_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>; + +// Strip pointer authentication code +def : InstRW<[V1Write_2c_1M0], (instrs XPACD, XPACI, XPACLRI)>; + + +// Miscellaneous data-processing instructions +// ----------------------------------------------------------------------------- + +// Bitfield extract, one reg +// Bitfield extract, two regs +def V1WriteExtr : SchedWriteVariant< + [SchedVar, + SchedVar]>; +def : SchedAlias; + +// Bitfield move, basic +// Variable shift +def : SchedAlias; + +// Bitfield move, insert +def : InstRW<[V1Write_2c_1M], (instregex "^BFM[WX]ri$")>; + +// Move immediate +def : SchedAlias; + + +// Load instructions +// ----------------------------------------------------------------------------- + +// Load register, immed offset +def : SchedAlias; + +// Load register, immed offset, index +def : SchedAlias; +def : SchedAlias; + +// Load pair, immed offset +def : SchedAlias; + +// Load pair, signed immed offset, signed words +def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z], (instrs LDPSWi)>; + +// Load pair, immed post or pre-index, signed words +def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z, WriteAdr], + (instrs LDPSWpost, LDPSWpre)>; + + +// Store instructions +// ----------------------------------------------------------------------------- + +// Store register, immed offset +def : SchedAlias; + +// Store register, immed offset, index +def : SchedAlias; + +// Store pair, immed offset +def : SchedAlias; + + +// FP data processing instructions +// ----------------------------------------------------------------------------- + +// FP absolute value +// FP arithmetic +// FP min/max +// FP negate +def : SchedAlias; + +// FP compare +def : SchedAlias; + +// FP divide +// FP square root +def : SchedAlias; + +// FP divide, H-form +// FP square root, H-form +def : InstRW<[V1Write_7c7_1V02], (instrs FDIVHrr, FSQRTHr)>; + +// FP divide, S-form +// FP square root, S-form +def : InstRW<[V1Write_10c7_1V02], (instrs FDIVSrr, FSQRTSr)>; + +// FP divide, D-form +def : InstRW<[V1Write_15c7_1V02], (instrs FDIVDrr)>; + +// FP square root, D-form +def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTDr)>; + +// FP multiply +def : SchedAlias; + +// FP multiply accumulate +def : InstRW<[V1Write_4c_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>; + +// FP round to integral +def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$", + "^FRINT(32|64)[XZ][SD]r$")>; + +// FP select +def : InstRW<[V1Write_2c_1V01], (instregex "^FCSEL[HSD]rrr$")>; + + +// FP miscellaneous instructions +// ----------------------------------------------------------------------------- + +// FP convert, from gen to vec reg +def : InstRW<[V1Write_3c_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>; + +// FP convert, from vec to gen reg +def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>; + +// FP convert, Javascript from vec to gen reg +def : InstRW<[V1Write_3c_1V0], (instrs FJCVTZS)>; + +// FP convert, from vec to vec reg +def : SchedAlias; + +// FP move, immed +def : SchedAlias; + +// FP move, register +def : InstRW<[V1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>; + +// FP transfer, from gen to low half of vec reg +def : InstRW<[V1Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>; + +// FP transfer, from gen to high half of vec reg +def : InstRW<[V1Write_5c_1M0_1V], (instrs FMOVXDHighr)>; + +// FP transfer, from vec to gen reg +def : SchedAlias; + + +// FP load instructions +// ----------------------------------------------------------------------------- + +// Load vector reg, literal, S/D/Q forms +// Load vector reg, unscaled immed +// Load vector reg, unsigned immed +def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$", + "^LDUR[BHSDQ]i$", + "^LDR[BHSDQ]ui$")>; + +// Load vector reg, immed post-index +// Load vector reg, immed pre-index +def : InstRW<[V1Write_6c_1L, WriteAdr], + (instregex "^LDR[BHSDQ](post|pre)$")>; + +// Load vector reg, register offset, basic +// Load vector reg, register offset, scale, S/D-form +// Load vector reg, register offset, extend +// Load vector reg, register offset, extend, scale, S/D-form +def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>; + +// Load vector reg, register offset, scale, H/Q-form +// Load vector reg, register offset, extend, scale, H/Q-form +def : InstRW<[V1Write_7c_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>; + +// Load vector pair, immed offset, S/D-form +def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z], (instregex "^LDN?P[SD]i$")>; + +// Load vector pair, immed offset, Q-form +def : InstRW<[V1Write_6c_1L, WriteLDHi], (instrs LDPQi, LDNPQi)>; + +// Load vector pair, immed post-index, S/D-form +// Load vector pair, immed pre-index, S/D-form +def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z, WriteAdr], + (instregex "^LDP[SD](pre|post)$")>; + +// Load vector pair, immed post-index, Q-form +// Load vector pair, immed pre-index, Q-form +def : InstRW<[V1Write_6c_1L, WriteLDHi, WriteAdr], + (instrs LDPQpost, LDPQpre)>; + + +// FP store instructions +// ----------------------------------------------------------------------------- + +// Store vector reg, unscaled immed, B/H/S/D/Q-form +def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>; + +// Store vector reg, immed post-index, B/H/S/D/Q-form +// Store vector reg, immed pre-index, B/H/S/D/Q-form +def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr], + (instregex "^STR[BHSDQ](pre|post)$")>; + +// Store vector reg, unsigned immed, B/H/S/D/Q-form +def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>; + +// Store vector reg, register offset, basic, B/S/D-form +// Store vector reg, register offset, scale, B/S/D-form +// Store vector reg, register offset, extend, B/S/D-form +// Store vector reg, register offset, extend, scale, B/S/D-form +def : InstRW<[V1Write_2c_1L01_1V01, ReadAdrBase], + (instregex "^STR[BSD]ro[WX]$")>; + +// Store vector reg, register offset, basic, H/Q-form +// Store vector reg, register offset, scale, H/Q-form +// Store vector reg, register offset, extend, H/Q-form +// Store vector reg, register offset, extend, scale, H/Q-form +def : InstRW<[V1Write_2c_1I_1L01_1V01, ReadAdrBase], + (instregex "^STR[HQ]ro[WX]$")>; + +// Store vector pair, immed offset, S/D/Q-form +def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STN?P[SDQ]i$")>; + +// Store vector pair, immed post-index, S/D-form +// Store vector pair, immed pre-index, S/D-form +def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr], + (instregex "^STP[SD](pre|post)$")>; + +// Store vector pair, immed post-index, Q-form +// Store vector pair, immed pre-index, Q-form +def : InstRW<[V1Write_2c_2L01_1V01, WriteAdr], (instrs STPQpre, STPQpost)>; + + +// ASIMD integer instructions +// ----------------------------------------------------------------------------- + +// ASIMD absolute diff +// ASIMD absolute diff long +// ASIMD arith, basic +// ASIMD arith, complex +// ASIMD arith, pair-wise +// ASIMD compare +// ASIMD logical +// ASIMD max/min, basic and pair-wise +def : SchedAlias; +def : SchedAlias; + +// ASIMD absolute diff accum +// ASIMD absolute diff accum long +// ASIMD pairwise add and accumulate long +def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]ABAL?v", "^[SU]ADALPv")>; + +// ASIMD arith, reduce, 4H/4S +// ASIMD max/min, reduce, 4H/4S +def : InstRW<[V1Write_2c_1V13], (instregex "^(ADD|[SU]ADDL)Vv4(i16|i32)v$", + "^[SU](MAX|MIN)Vv4(i16|i32)v$")>; + +// ASIMD arith, reduce, 8B/8H +// ASIMD max/min, reduce, 8B/8H +def : InstRW<[V1Write_4c_1V13_1V], (instregex "^(ADD|[SU]ADDL)Vv8(i8|i16)v$", + "^[SU](MAX|MIN)Vv8(i8|i16)v$")>; + +// ASIMD arith, reduce, 16B +// ASIMD max/min, reduce, 16B +def : InstRW<[V1Write_4c_2V13], (instregex "^(ADD|[SU]ADDL)Vv16i8v$", + "[SU](MAX|MIN)Vv16i8v$")>; + +// ASIMD dot product +// ASIMD dot product using signed and unsigned integers +def : InstRW<[V1Write_2c_1V], (instregex "^([SU]|SU|US)DOT(lane)?v(8|16)i8$")>; + +// ASIMD matrix multiply- accumulate +def : InstRW<[V1Write_3c_1V], (instrs SMMLA, UMMLA, USMMLA)>; + +// ASIMD multiply +// ASIMD multiply accumulate +// ASIMD multiply accumulate long +// ASIMD multiply accumulate high +// ASIMD multiply accumulate saturating long +def : InstRW<[V1Write_4c_1V02], + (instregex "^MUL(v[148]i16|v[124]i32)$", + "^SQR?DMULH(v[48]i16|v[24]i32)$", + "^ML[AS](v[148]i16|v[124]i32)$", + "^[SU]ML[AS]Lv", + "^SQRDML[AS]H(v[148]i16|v[124]i32)$", + "^SQDML[AS]Lv")>; + +// ASIMD multiply/multiply long (8x8) polynomial +def : InstRW<[V1Write_3c_1V01], (instregex "^PMULL?v(8|16)i8$")>; + +// ASIMD multiply long +def : InstRW<[V1Write_3c_1V02], (instregex "^([SU]|SQD)MULLv")>; + +// ASIMD shift accumulate +// ASIMD shift by immed, complex +// ASIMD shift by register, complex +def : InstRW<[V1Write_4c_1V13], + (instregex "^[SU]R?SRAv", + "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$", + "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$", + "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv", + "^[SU]Q?RSHLv", "^[SU]QSHLv")>; + +// ASIMD shift by immed, basic +// ASIMD shift by immed and insert, basic +// ASIMD shift by register, basic +def : InstRW<[V1Write_2c_1V13], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv", + "^[SU]SHRv", "^S[LR]Iv", "^[SU]SHLv")>; + + +// ASIMD FP instructions +// ----------------------------------------------------------------------------- + +// ASIMD FP absolute value/difference +// ASIMD FP arith, normal +// ASIMD FP compare +// ASIMD FP complex add +// ASIMD FP max/min, normal +// ASIMD FP max/min, pairwise +// ASIMD FP negate +// Covered by "SchedAlias (WriteV[dq]...)" above + +// ASIMD FP complex multiply add +// ASIMD FP multiply accumulate +def : InstRW<[V1Write_4c_1V], (instregex "^FCADD(v[48]f16|v[24]f32|v2f64)$", + "^FML[AS]v")>; + +// ASIMD FP convert, long (F16 to F32) +def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTLv[48]i16$")>; + +// ASIMD FP convert, long (F32 to F64) +def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTLv[24]i32$")>; + +// ASIMD FP convert, narrow (F32 to F16) +def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTNv[48]i16$")>; + +// ASIMD FP convert, narrow (F64 to F32) +def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTNv[24]i32$", + "^FCVTXN(v[24]f32|v1i64)$")>; + +// ASIMD FP convert, other, D-form F32 and Q-form F64 +def : InstRW<[V1Write_3c_1V02], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$", + "^[SU]CVTFv2f(32|64)$")>; + +// ASIMD FP convert, other, D-form F16 and Q-form F32 +def : InstRW<[V1Write_4c_2V02], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$", + "^[SU]CVTFv4f(16|32)$")>; + +// ASIMD FP convert, other, Q-form F16 +def : InstRW<[V1Write_6c_4V02], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$", + "^[SU]CVTFv8f16$")>; + +// ASIMD FP divide, D-form, F16 +// ASIMD FP square root, D-form, F16 +def : InstRW<[V1Write_7c7_1V02], (instrs FDIVv4f16, FSQRTv4f16)>; + +// ASIMD FP divide, F32 +// ASIMD FP square root, F32 +def : InstRW<[V1Write_10c7_1V02], (instrs FDIVv2f32, FDIVv4f32, + FSQRTv2f32, FSQRTv4f32)>; + +// ASIMD FP divide, Q-form, F16 +def : InstRW<[V1Write_13c5_1V02], (instrs FDIVv8f16)>; + +// ASIMD FP divide, Q-form, F64 +def : InstRW<[V1Write_15c7_1V02], (instrs FDIVv2f64)>; + +// ASIMD FP square root, Q-form, F16 +def : InstRW<[V1Write_13c11_1V02], (instrs FSQRTv8f16)>; + +// ASIMD FP square root, Q-form, F64 +def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTv2f64)>; + +// ASIMD FP max/min, reduce, F32 and D-form F16 +def : InstRW<[V1Write_4c_2V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>; + +// ASIMD FP max/min, reduce, Q-form F16 +def : InstRW<[V1Write_6c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>; + +// ASIMD FP multiply +def : InstRW<[V1Write_3c_1V], (instregex "^FMULX?v")>; + +// ASIMD FP multiply accumulate long +def : InstRW<[V1Write_5c_1V], (instregex "^FML[AS]L2?v")>; + +// ASIMD FP round, D-form F32 and Q-form F64 +def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>; + +// ASIMD FP round, D-form F16 and Q-form F32 +def : InstRW<[V1Write_4c_2V02], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>; + +// ASIMD FP round, Q-form F16 +def : InstRW<[V1Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>; + + +// ASIMD BF instructions +// ----------------------------------------------------------------------------- + +// ASIMD convert, F32 to BF16 +def : InstRW<[V1Write_4c_1V02], (instrs BFCVTN, BFCVTN2)>; + +// ASIMD dot product +def : InstRW<[V1Write_4c_1V], (instregex "^BF(DOT|16DOTlane)v[48]bf16$")>; + +// ASIMD matrix multiply accumulate +def : InstRW<[V1Write_5c_1V], (instrs BFMMLA)>; + +// ASIMD multiply accumulate long +def : InstRW<[V1Write_4c_1V], (instregex "^BFMLAL[BT](Idx)?$")>; + +// Scalar convert, F32 to BF16 +def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>; + + +// ASIMD miscellaneous instructions +// ----------------------------------------------------------------------------- + +// ASIMD bit reverse +// ASIMD bitwise insert +// ASIMD count +// ASIMD duplicate, element +// ASIMD extract +// ASIMD extract narrow +// ASIMD insert, element to element +// ASIMD move, FP immed +// ASIMD move, integer immed +// ASIMD reverse +// ASIMD table lookup, 1 or 2 table regs +// ASIMD table lookup extension, 1 table reg +// ASIMD transfer, element to gen reg +// ASIMD transpose +// ASIMD unzip/zip +// Covered by "SchedAlias (WriteV[dq]...)" above + +// ASIMD duplicate, gen reg +def : InstRW<[V1Write_3c_1M0], + (instregex "^DUP((v16|v8)i8|(v8|v4)i16|(v4|v2)i32|v2i64)gpr$")>; + +// ASIMD extract narrow, saturating +def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>; + +// ASIMD reciprocal and square root estimate, D-form U32 +// ASIMD reciprocal and square root estimate, D-form F32 and F64 +def : InstRW<[V1Write_3c_1V02], (instrs URECPEv2i32, + FRECPEv1i32, FRECPEv2f32, FRECPEv1i64, + URSQRTEv2i32, + FRSQRTEv1i32, FRSQRTEv2f32, FRSQRTEv1i64)>; + +// ASIMD reciprocal and square root estimate, Q-form U32 +// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 +def : InstRW<[V1Write_4c_1V02], (instrs URECPEv4i32, + FRECPEv1f16, FRECPEv4f16, FRECPEv4f32, + URSQRTEv4i32, + FRSQRTEv1f16, FRSQRTEv4f16, FRSQRTEv4f32)>; + +// ASIMD reciprocal and square root estimate, Q-form F16 +def : InstRW<[V1Write_6c_2V02], (instrs FRECPEv8f16, + FRSQRTEv8f16)>; + +// ASIMD reciprocal exponent +def : InstRW<[V1Write_3c_1V02], (instrs FRECPXv1f16, FRECPXv1i32, FRECPXv1i64)>; + +// ASIMD reciprocal step +def : InstRW<[V1Write_4c_1V], (instregex "^FRECPS(16|32|64)$", "^FRECPSv", + "^FRSQRTS(16|32|64)$", "^FRSQRTSv")>; + +// ASIMD table lookup, 1 or 2 table regs +// ASIMD table lookup extension, 1 table reg +def : InstRW<[V1Write_2c_2V01], (instregex "^TBLv(8|16)i8(One|Two)$", + "^TBXv(8|16)i8One$")>; + +// ASIMD table lookup, 3 table regs +// ASIMD table lookup extension, 2 table reg +def : InstRW<[V1Write_4c_2V01], (instrs TBLv8i8Three, TBLv16i8Three, + TBXv8i8Two, TBXv16i8Two)>; + +// ASIMD table lookup, 4 table regs +def : InstRW<[V1Write_4c_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>; + +// ASIMD table lookup extension, 3 table reg +def : InstRW<[V1Write_6c_3V01], (instrs TBXv8i8Three, TBXv16i8Three)>; + +// ASIMD table lookup extension, 4 table reg +def : InstRW<[V1Write_6c_5V01], (instrs TBXv8i8Four, TBXv16i8Four)>; + +// ASIMD transfer, element to gen reg +def : InstRW<[V1Write_2c_1V], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$", + "^UMOVvi(8|16|32|64)$")>; + +// ASIMD transfer, gen reg to element +def : InstRW<[V1Write_5c_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>; + + +// ASIMD load instructions +// ----------------------------------------------------------------------------- + +// ASIMD load, 1 element, multiple, 1 reg +def : InstRW<[V1Write_6c_1L], + (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; +def : InstRW<[V1Write_6c_1L, WriteAdr], + (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 2 reg +def : InstRW<[V1Write_6c_2L], + (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>; +def : InstRW<[V1Write_6c_2L, WriteAdr], + (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 3 reg +def : InstRW<[V1Write_6c_3L], + (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; +def : InstRW<[V1Write_6c_3L, WriteAdr], + (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 4 reg, D-form +def : InstRW<[V1Write_6c_2L], + (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[V1Write_6c_2L, WriteAdr], + (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; + +// ASIMD load, 1 element, multiple, 4 reg, Q-form +def : InstRW<[V1Write_7c_4L], + (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[V1Write_7c_4L, WriteAdr], + (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, one lane +// ASIMD load, 1 element, all lanes +def : InstRW<[V1Write_8c_1L_1V], + (instregex "^LD1(i|Rv)(8|16|32|64)$", + "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; +def : InstRW<[V1Write_8c_1L_1V, WriteAdr], + (instregex "^LD1i(8|16|32|64)_POST$", + "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; + +// ASIMD load, 2 element, multiple, D-form +def : InstRW<[V1Write_8c_1L_2V], + (instregex "^LD2Twov(8b|4h|2s)$")>; +def : InstRW<[V1Write_8c_1L_2V, WriteAdr], + (instregex "^LD2Twov(8b|4h|2s)_POST$")>; + +// ASIMD load, 2 element, multiple, Q-form +def : InstRW<[V1Write_8c_2L_2V], + (instregex "^LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[V1Write_8c_2L_2V, WriteAdr], + (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 2 element, one lane +// ASIMD load, 2 element, all lanes +def : InstRW<[V1Write_8c_1L_2V], + (instregex "^LD2i(8|16|32|64)$", + "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; +def : InstRW<[V1Write_8c_1L_2V, WriteAdr], + (instregex "^LD2i(8|16|32|64)_POST$", + "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; + +// ASIMD load, 3 element, multiple, D-form +// ASIMD load, 3 element, one lane +// ASIMD load, 3 element, all lanes +def : InstRW<[V1Write_8c_2L_3V], + (instregex "^LD3Threev(8b|4h|2s)$", + "^LD3i(8|16|32|64)$", + "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; +def : InstRW<[V1Write_8c_2L_3V, WriteAdr], + (instregex "^LD3Threev(8b|4h|2s)_POST$", + "^LD3i(8|16|32|64)_POST$", + "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; + +// ASIMD load, 3 element, multiple, Q-form +def : InstRW<[V1Write_8c_3L_3V], + (instregex "^LD3Threev(16b|8h|4s)$")>; +def : InstRW<[V1Write_8c_3L_3V, WriteAdr], + (instregex "^LD3Threev(16b|8h|4s)_POST$")>; + +// ASIMD load, 4 element, multiple, D-form +// ASIMD load, 4 element, one lane +// ASIMD load, 4 element, all lanes +def : InstRW<[V1Write_8c_3L_4V], + (instregex "^LD4Fourv(8b|4h|2s)$", + "^LD4i(8|16|32|64)$", + "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; +def : InstRW<[V1Write_8c_3L_4V, WriteAdr], + (instregex "^LD4Fourv(8b|4h|2s)_POST$", + "^LD4i(8|16|32|64)_POST$", + "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; + +// ASIMD load, 4 element, multiple, Q-form +def : InstRW<[V1Write_9c_4L_4V], + (instregex "^LD4Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[V1Write_9c_4L_4V, WriteAdr], + (instregex "^LD4Fourv(16b|8h|4s|2d)_POST$")>; + + +// ASIMD store instructions +// ----------------------------------------------------------------------------- + +// ASIMD store, 1 element, multiple, 1 reg +// ASIMD store, 1 element, multiple, 2 reg, D-form +def : InstRW<[V1Write_2c_1L01_1V01], + (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$", + "^ST1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr], + (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$", + "^ST1Twov(8b|4h|2s|1d)_POST$")>; + +// ASIMD store, 1 element, multiple, 2 reg, Q-form +// ASIMD store, 1 element, multiple, 3 reg, D-form +// ASIMD store, 1 element, multiple, 4 reg, D-form +def : InstRW<[V1Write_2c_2L01_2V01], + (instregex "^ST1Twov(16b|8h|4s|2d)$", + "^ST1Threev(8b|4h|2s|1d)$", + "^ST1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[V1Write_2c_2L01_2V01, WriteAdr], + (instregex "^ST1Twov(16b|8h|4s|2d)_POST$", + "^ST1Threev(8b|4h|2s|1d)_POST$", + "^ST1Fourv(8b|4h|2s|1d)_POST$")>; + +// ASIMD store, 1 element, multiple, 3 reg, Q-form +def : InstRW<[V1Write_2c_3L01_3V01], + (instregex "^ST1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[V1Write_2c_3L01_3V01, WriteAdr], + (instregex "^ST1Threev(16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 4 reg, Q-form +def : InstRW<[V1Write_2c_4L01_4V01], + (instregex "^ST1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[V1Write_2c_4L01_4V01, WriteAdr], + (instregex "^ST1Fourv(16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, one lane +// ASIMD store, 2 element, multiple, D-form +// ASIMD store, 2 element, one lane +def : InstRW<[V1Write_4c_1L01_1V01], + (instregex "^ST1i(8|16|32|64)$", + "^ST2Twov(8b|4h|2s)$", + "^ST2i(8|16|32|64)$")>; +def : InstRW<[V1Write_4c_1L01_1V01, WriteAdr], + (instregex "^ST1i(8|16|32|64)_POST$", + "^ST2Twov(8b|4h|2s)_POST$", + "^ST2i(8|16|32|64)_POST$")>; + +// ASIMD store, 2 element, multiple, Q-form +// ASIMD store, 3 element, multiple, D-form +// ASIMD store, 3 element, one lane +// ASIMD store, 4 element, one lane, D +def : InstRW<[V1Write_4c_2L01_2V01], + (instregex "^ST2Twov(16b|8h|4s|2d)$", + "^ST3Threev(8b|4h|2s)$", + "^ST3i(8|16|32|64)$", + "^ST4i64$")>; +def : InstRW<[V1Write_4c_2L01_2V01, WriteAdr], + (instregex "^ST2Twov(16b|8h|4s|2d)_POST$", + "^ST3Threev(8b|4h|2s)_POST$", + "^ST3i(8|16|32|64)_POST$", + "^ST4i64_POST$")>; + +// ASIMD store, 3 element, multiple, Q-form +def : InstRW<[V1Write_5c_3L01_3V01], + (instregex "^ST3Threev(16b|8h|4s|2d)$")>; +def : InstRW<[V1Write_5c_3L01_3V01, WriteAdr], + (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 4 element, multiple, D-form +def : InstRW<[V1Write_6c_3L01_3V01], + (instregex "^ST4Fourv(8b|4h|2s)$")>; +def : InstRW<[V1Write_6c_3L01_3V01, WriteAdr], + (instregex "^ST4Fourv(8b|4h|2s)_POST$")>; + +// ASIMD store, 4 element, multiple, Q-form, B/H/S +def : InstRW<[V1Write_7c_6L01_6V01], + (instregex "^ST4Fourv(16b|8h|4s)$")>; +def : InstRW<[V1Write_7c_6L01_6V01, WriteAdr], + (instregex "^ST4Fourv(16b|8h|4s)_POST$")>; + +// ASIMD store, 4 element, multiple, Q-form, D +def : InstRW<[V1Write_4c_4L01_4V01], + (instrs ST4Fourv2d)>; +def : InstRW<[V1Write_4c_4L01_4V01, WriteAdr], + (instrs ST4Fourv2d_POST)>; + +// ASIMD store, 4 element, one lane, B/H/S +def : InstRW<[V1Write_6c_3L_3V], + (instregex "^ST4i(8|16|32)$")>; +def : InstRW<[V1Write_6c_3L_3V, WriteAdr], + (instregex "^ST4i(8|16|32)_POST$")>; + + +// Cryptography extensions +// ----------------------------------------------------------------------------- + +// Crypto polynomial (64x64) multiply long +// Covered by "SchedAlias (WriteV[dq]...)" above + +// Crypto AES ops +def V1WriteVC : WriteSequence<[V1Write_2c_1V]>; +def V1ReadVC : SchedReadAdvance<2, [V1WriteVC]>; +def : InstRW<[V1WriteVC], (instrs AESDrr, AESErr)>; +def : InstRW<[V1Write_2c_1V, V1ReadVC], (instrs AESMCrr, AESIMCrr)>; + +// Crypto SHA1 hash acceleration op +// Crypto SHA1 schedule acceleration ops +// Crypto SHA256 schedule acceleration ops +// Crypto SHA512 hash acceleration ops +// Crypto SM3 ops +def : InstRW<[V1Write_2c_1V0], (instregex "^SHA1(H|SU[01])rr$", + "^SHA256SU[01]rr$", + "^SHA512(H2?|SU[01])$", + "^SM3(PARTW(1|2SM3SS1)|TT[12][AB])$")>; + +// Crypto SHA1 hash acceleration ops +// Crypto SHA256 hash acceleration ops +// Crypto SM4 ops +def : InstRW<[V1Write_4c_1V0], (instregex "^SHA1[CMP]rrr$", + "^SHA256H2?rrr$", + "^SM4E(KEY)?$")>; + +// Crypto SHA3 ops +def : InstRW<[V1Write_2c_1V0], (instrs BCAX, EOR3, RAX1, XAR)>; + + +// CRC instruction +// ----------------------------------------------------------------------------- + +// CRC checksum ops +def : InstRW<[V1Write_2c_1M0], (instregex "^CRC32C?[BHWX]rr$")>; + + +// SVE Predicate instructions +// ----------------------------------------------------------------------------- + +// Loop control, based on predicate +def : InstRW<[V1Write_2c_1M0], (instregex "^BRK[AB]_PP[mz]P$")>; +def : InstRW<[V1Write_2c_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>; + +// Loop control, based on predicate and flag setting +def : InstRW<[V1Write_3c_2M0], (instrs BRKAS_PPzP, BRKBS_PPzP, BRKNS_PPzP, + BRKPAS_PPzPP, BRKPBS_PPzPP)>; + +// Loop control, based on GPR +def : InstRW<[V1Write_3c_2M0], (instregex "^WHILE(LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>; + +// Loop terminate +def : InstRW<[V1Write_1c_1M0], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>; + +// Predicate counting scalar +def : InstRW<[V1Write_2c_1M0], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; +def : InstRW<[V1Write_2c_1M0], (instregex "^(CNT|([SU]Q)?(DEC|INC))[BHWD]_XPiI$", + "^SQ(DEC|INC)[BHWD]_XPiWdI$", + "^UQ(DEC|INC)[BHWD]_WPiI$")>; + +// Predicate counting scalar, active predicate +def : InstRW<[V1Write_2c_1M0], (instregex "^CNTP_XPP_[BHSD]$", + "^([SU]Q)?(DEC|INC)P_XP_[BHSD]$", + "^UQ(DEC|INC)P_WP_[BHSD]$", + "^[SU]Q(DEC|INC)P_XPWd_[BHSD]$")>; + +// Predicate counting vector, active predicate +def : InstRW<[V1Write_7c_2M0_1V01], (instregex "^([SU]Q)?(DEC|INC)P_ZP_[HSD]$")>; + +// Predicate logical +def : InstRW<[V1Write_1c_1M0], + (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>; + +// Predicate logical, flag setting +def : InstRW<[V1Write_2c_2M0], + (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)S_PPzPP$")>; + +// Predicate reverse +def : InstRW<[V1Write_2c_1M0], (instregex "^REV_PP_[BHSD]$")>; + +// Predicate select +def : InstRW<[V1Write_1c_1M0], (instrs SEL_PPPP)>; + +// Predicate set/initialize/find next +def : InstRW<[V1Write_2c_1M0], (instregex "^PFALSE$", "^PFIRST_B$", + "^PNEXT_[BHSD]$", "^PTRUE_[BHSD]$")>; +def : InstRW<[V1Write_2c_1M0], (instrs PTEST_PP)>; + +// Predicate set/initialize, set flags +def : InstRW<[V1Write_3c_2M0], (instregex "^PTRUES_[BHSD]$")>; + +// Predicate transpose +def : InstRW<[V1Write_2c_1M0], (instregex "^TRN[12]_PPP_[BHSDQ]$")>; + +// Predicate unpack and widen +def : InstRW<[V1Write_2c_1M0], (instrs PUNPKHI_PP, PUNPKLO_PP)>; + +// Predicate zip/unzip +def : InstRW<[V1Write_2c_1M0], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>; + + +// SVE integer instructions +// ----------------------------------------------------------------------------- + +// Arithmetic, basic +def : InstRW<[V1Write_2c_1V01], + (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]$", + "^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]$", + "^ADR_[SU]XTW_ZZZ_D_[0123]$", + "^ADR_LSL_ZZZ_[SD]_[0123]$", + "^[SU]ABD_ZP[mZ]Z_[BHSD]$", + "^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]$", + "^[SU]Q(ADD|SUB)_Z(I|ZZ)_[BHSD]$", + "^SUBR_Z(I|P[mZ]Z)_[BHSD]$")>; + +// Arithmetic, shift +def : InstRW<[V1Write_2c_1V1], + (instregex "^(ASR|LSL|LSR)_WIDE_Z(Pm|Z)Z_[BHS]$", + "^(ASR|LSL|LSR)_Z[PZ]I_[BHSD]$", + "^(ASR|LSL|LSR)_ZP[mZ][IZ]_[BHSD]$", + "^(ASR|LSL|LSR)_ZPZ[IZ]_ZERO_[BHSD]$", + "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>; + +// Arithmetic, shift right for divide +def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_ZP[mZ]I_[BHSD]$")>; + +// Count/reverse bits +def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>; + +// Broadcast logical bitmask immediate to vector +def : InstRW<[V1Write_2c_1V01], (instrs DUPM_ZI)>; + +// Compare and set flags +def : InstRW<[V1Write_4c_1M0_1V0], + (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$", + "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>; + +// Conditional extract operations, scalar form +def : InstRW<[V1Write_9c_1M0_1V1], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>; + +// Conditional extract operations, SIMD&FP scalar and vector forms +def : InstRW<[V1Write_3c_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$", + "^COMPACT_ZPZ_[SD]$", + "^SPLICE_ZPZZ?_[BHSD]$")>; + +// Convert to floating point, 64b to float or convert to double +def : InstRW<[V1Write_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]$")>; + +// Convert to floating point, 32b to single or half +def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>; + +// Convert to floating point, 16b to half +def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>; + +// Copy, scalar +def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>; + +// Copy, scalar SIMD&FP or imm +def : InstRW<[V1Write_2c_1V01], (instregex "^CPY_ZP([mz]I|mV)_[BHSD]$")>; + +// Divides, 32 bit +def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>; + +// Divides, 64 bit +def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>; + +// Dot product, 8 bit +def : InstRW<[V1Write_3c_1V01], (instregex "^[SU]DOT_ZZZI?_S$")>; + +// Dot product, 8 bit, using signed and unsigned integers +def : InstRW<[V1Write_3c_1V], (instrs SUDOT_ZZZI, USDOT_ZZZ, USDOT_ZZZI)>; + +// Dot product, 16 bit +def : InstRW<[V1Write_4c_1V01], (instregex "^[SU]DOT_ZZZI?_D$")>; + +// Duplicate, immediate and indexed form +def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$", + "^DUP_ZZI_[BHSDQ]$")>; + +// Duplicate, scalar form +def : InstRW<[V1Write_3c_1M0], (instregex "^DUP_ZR_[BHSD]$")>; + +// Extend, sign or zero +def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$", + "^[SU]XTH_ZPmZ_[SD]$", + "^[SU]XTW_ZPmZ_[D]$")>; + +// Extract +def : InstRW<[V1Write_2c_1V01], (instrs EXT_ZZI)>; + +// Extract/insert operation, SIMD and FP scalar form +def : InstRW<[V1Write_3c_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$", + "^INSR_ZV_[BHSD]$")>; + +// Extract/insert operation, scalar +def : InstRW<[V1Write_6c_1M0_1V1], (instregex "^LAST[AB]_RPZ_[BHSD]$", + "^INSR_ZR_[BHSD]$")>; + +// Horizontal operations, B, H, S form, imm, imm +def : InstRW<[V1Write_4c_1V0], (instregex "^INDEX_II_[BHS]$")>; + +// Horizontal operations, B, H, S form, scalar, imm / scalar / imm, scalar +def : InstRW<[V1Write_7c_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>; + +// Horizontal operations, D form, imm, imm +def : InstRW<[V1Write_5c_2V0], (instrs INDEX_II_D)>; + +// Horizontal operations, D form, scalar, imm / scalar / imm, scalar +def : InstRW<[V1Write_8c_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>; + +// Logical +def : InstRW<[V1Write_2c_1V], + (instregex "^(AND|EOR|ORR)_ZI$", + "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$", + "^EOR(BT|TB)_ZZZ_[BHSD]$", + "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>; + +// Move prefix +def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$", + "^MOVPRFX_ZZ$")>; + +// Matrix multiply-accumulate +def : InstRW<[V1Write_3c_1V01], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; + +// Multiply, B, H, S element size +def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ)_[BHS]$", + "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>; + +// Multiply, D element size +def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ)_D$", + "^[SU]MULH_ZPmZ_D$")>; + +// Multiply accumulate, D element size +def : InstRW<[V1Write_5c_2V0], (instregex "^(MLA|MLS|MAD|MSB)_ZPmZZ_D$")>; + +// Predicate counting vector +def : InstRW<[V1Write_2c_1V0], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>; + +// Reduction, arithmetic, B form +def : InstRW<[V1Write_14c_1V_1V0_2V1_1V13], + (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; + +// Reduction, arithmetic, H form +def : InstRW<[V1Write_12c_1V_1V01_2V1], + (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>; + +// Reduction, arithmetic, S form +def : InstRW<[V1Write_10c_1V_1V01_2V1], + (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>; + +// Reduction, logical +def : InstRW<[V1Write_12c_4V01], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>; + +// Reverse, vector +def : InstRW<[V1Write_2c_1V01], (instregex "^REV_ZZ_[BHSD]$", + "^REVB_ZPmZ_[HSD]$", + "^REVH_ZPmZ_[SD]$", + "^REVW_ZPmZ_D$")>; + +// Select, vector form +def : InstRW<[V1Write_2c_1V01], (instregex "^SEL_ZPZZ_[BHSD]$")>; + +// Table lookup +// Table lookup extension +def : InstRW<[V1Write_2c_1V01], (instregex "^TB[LX]_ZZZ_[BHSD]$")>; + +// Transpose, vector form +def : InstRW<[V1Write_2c_1V01], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>; + +// Unpack and extend +def : InstRW<[V1Write_2c_1V01], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>; + +// Zip/unzip +def : InstRW<[V1Write_2c_1V01], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>; + + +// SVE floating-point instructions +// ----------------------------------------------------------------------------- + +// Floating point absolute value/difference +def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]$")>; + +// Floating point arithmetic +def : InstRW<[V1Write_2c_1V01], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$", + "^FADDP_ZPmZZ_[HSD]$", + "^FNEG_ZPmZ_[HSD]$", + "^FSUBR_ZPm[IZ]_[HSD]$")>; + +// Floating point associative add, F16 +def : InstRW<[V1Write_19c_18V0], (instrs FADDA_VPZ_H)>; + +// Floating point associative add, F32 +def : InstRW<[V1Write_11c_10V0], (instrs FADDA_VPZ_S)>; + +// Floating point associative add, F64 +def : InstRW<[V1Write_8c_3V01], (instrs FADDA_VPZ_D)>; + +// Floating point compare +def : InstRW<[V1Write_2c_1V0], (instregex "^FAC(GE|GT)_PPzZZ_[HSD]$", + "^FCM(EQ|GE|GT|NE|UO)_PPzZZ_[HSD]$", + "^FCM(EQ|GE|GT|LE|LT|NE)_PPzZ0_[HSD]$")>; + +// Floating point complex add +def : InstRW<[V1Write_3c_1V01], (instregex "^FCADD_ZPmZ_[HSD]$")>; + +// Floating point complex multiply add +def : InstRW<[V1Write_5c_1V01], (instregex "^FCMLA_ZPmZZ_[HSD]$", + "^FCMLA_ZZZI_[HS]$")>; + +// Floating point convert, long or narrow (F16 to F32 or F32 to F16) +def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$")>; + +// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) +def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$")>; + +// Floating point convert to integer, F16 +def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>; + +// Floating point convert to integer, F32 +def : InstRW<[V1Write_4c_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>; + +// Floating point convert to integer, F64 +def : InstRW<[V1Write_3c_1V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>; + +// Floating point copy +def : InstRW<[V1Write_2c_1V01], (instregex "^FCPY_ZPmI_[HSD]$", + "^FDUP_ZI_[HSD]$")>; + +// Floating point divide, F16 +def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_ZPmZ_H$")>; + +// Floating point divide, F32 +def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_ZPmZ_S$")>; + +// Floating point divide, F64 +def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_ZPmZ_D$")>; + +// Floating point min/max +def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>; + +// Floating point multiply +def : InstRW<[V1Write_3c_1V01], (instregex "^F(SCALE|MULX)_ZPmZ_[HSD]$", + "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>; + +// Floating point multiply accumulate +def : InstRW<[V1Write_4c_1V01], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$", + "^FML[AS]_ZZZI_[HSD]$")>; + +// Floating point reciprocal estimate, F16 +def : InstRW<[V1Write_6c_4V0], (instrs FRECPE_ZZ_H, FRSQRTE_ZZ_H)>; + +// Floating point reciprocal estimate, F32 +def : InstRW<[V1Write_4c_2V0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>; + +// Floating point reciprocal estimate, F64 +def : InstRW<[V1Write_3c_1V0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>; + +// Floating point reciprocal exponent +def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]$")>; + +// Floating point reciprocal step +def : InstRW<[V1Write_4c_1V01], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>; + +// Floating point reduction, F16 +def : InstRW<[V1Write_13c_6V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>; + +// Floating point reduction, F32 +def : InstRW<[V1Write_11c_1V_5V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$")>; + +// Floating point reduction, F64 +def : InstRW<[V1Write_9c_1V_4V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>; + +// Floating point round to integral, F16 +def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>; + +// Floating point round to integral, F32 +def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>; + +// Floating point round to integral, F64 +def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>; + +// Floating point square root, F16 +def : InstRW<[V1Write_13c10_1V0], (instrs FSQRT_ZPmZ_H)>; + +// Floating point square root, F32 +def : InstRW<[V1Write_10c7_1V0], (instrs FSQRT_ZPmZ_S)>; + +// Floating point square root, F64 +def : InstRW<[V1Write_16c7_1V0], (instrs FSQRT_ZPmZ_D)>; + +// Floating point trigonometric +def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$", + "^FTMAD_ZZI_[HSD]$", + "^FTS(MUL|SEL)_ZZZ_[HSD]$")>; + + +// SVE BFloat16 (BF16) instructions +// ----------------------------------------------------------------------------- + +// Convert, F32 to BF16 +def : InstRW<[V1Write_4c_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>; + +// Dot product +def : InstRW<[V1Write_4c_1V01], (instrs BFDOT_ZZI, BFDOT_ZZZ)>; + +// Matrix multiply accumulate +def : InstRW<[V1Write_5c_1V01], (instrs BFMMLA_ZZZ)>; + +// Multiply accumulate long +def : InstRW<[V1Write_5c_1V01], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>; + + +// SVE Load instructions +// ----------------------------------------------------------------------------- + +// Load vector +def : InstRW<[V1Write_6c_1L01], (instrs LDR_ZXI)>; + +// Load predicate +def : InstRW<[V1Write_6c_1L_1M], (instrs LDR_PXI)>; + +// Contiguous load, scalar + imm +def : InstRW<[V1Write_6c_1L01], (instregex "^LD1[BHWD]_IMM_REAL$", + "^LD1S?B_[HSD]_IMM_REAL$", + "^LD1S?H_[SD]_IMM_REAL$", + "^LD1S?W_D_IMM_REAL$" )>; +// Contiguous load, scalar + scalar +def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LD1H$", + "^LD1S?H_[SD]$")>; +def : InstRW<[V1Write_6c_1L01], (instregex "^LD1[BWD]$", + "^LD1S?B_[HSD]$", + "^LD1S?W_D$")>; + +// Contiguous load broadcast, scalar + imm +def : InstRW<[V1Write_6c_1L01], (instregex "^LD1R[BHWD]_IMM$", + "^LD1RSW_IMM$", + "^LD1RS?B_[HSD]_IMM$", + "^LD1RS?H_[SD]_IMM$", + "^LD1RS?W_D_IMM$", + "^LD1RQ_[BHWD]_IMM$")>; + +// Contiguous load broadcast, scalar + scalar +def : InstRW<[V1Write_7c_1L01_1S], (instrs LD1RQ_H)>; +def : InstRW<[V1Write_6c_1L01], (instregex "^LD1RQ_[BWD]$")>; + +// Non temporal load, scalar + imm +def : InstRW<[V1Write_6c_1L01], (instregex "^LDNT1[BHWD]_ZRI$")>; + +// Non temporal load, scalar + scalar +def : InstRW<[V1Write_7c_1L01_1S], (instrs LDNT1H_ZRR)>; +def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDNT1[BWD]_ZRR$")>; + +// Contiguous first faulting load, scalar + scalar +def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H_REAL$", + "^LDFF1S?H_[SD]_REAL$")>; +def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]_REAL$", + "^LDFF1S?B_[HSD]_REAL$", + "^LDFF1S?W_D_REAL$")>; + +// Contiguous non faulting load, scalar + imm +def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM_REAL$", + "^LDNF1S?B_[HSD]_IMM_REAL$", + "^LDNF1S?H_[SD]_IMM_REAL$", + "^LDNF1S?W_D_IMM_REAL$")>; + +// Contiguous Load two structures to two vectors, scalar + imm +def : InstRW<[V1Write_8c_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>; + +// Contiguous Load two structures to two vectors, scalar + scalar +def : InstRW<[V1Write_10c_2L01_2V01], (instrs LD2H)>; +def : InstRW<[V1Write_9c_2L01_2V01], (instregex "^LD2[BWD]$")>; + +// Contiguous Load three structures to three vectors, scalar + imm +def : InstRW<[V1Write_11c_3L01_3V01], (instregex "^LD3[BHWD]_IMM$")>; + +// Contiguous Load three structures to three vectors, scalar + scalar +def : InstRW<[V1Write_13c_3L01_1S_3V01], (instregex "^LD3[BHWD]$")>; + +// Contiguous Load four structures to four vectors, scalar + imm +def : InstRW<[V1Write_12c_4L01_4V01], (instregex "^LD4[BHWD]_IMM$")>; + +// Contiguous Load four structures to four vectors, scalar + scalar +def : InstRW<[V1Write_13c_4L01_2S_4V01], (instregex "^LD4[BHWD]$")>; + +// Gather load, vector + imm, 32-bit element size +def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$", + "^GLD(FF)?1W_IMM_REAL$")>; + +// Gather load, vector + imm, 64-bit element size +def : InstRW<[V1Write_9c_2L_2V], + (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$", + "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?(SCALED_)?REAL$", + "^GLD(FF)?1D_IMM_REAL$", + "^GLD(FF)?1D_([SU]XTW_)?(SCALED_)?REAL$")>; + +// Gather load, 32-bit scaled offset +def : InstRW<[V1Write_11c_2L_2V], + (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$", + "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>; + +// Gather load, 32-bit unpacked unscaled offset +def : InstRW<[V1Write_9c_1L_1V], + (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$", + "^GLD(FF)?1W_[SU]XTW_REAL$")>; + + +// SVE Store instructions +// ----------------------------------------------------------------------------- + +// Store from predicate reg +def : InstRW<[V1Write_1c_1L01], (instrs STR_PXI)>; + +// Store from vector reg +def : InstRW<[V1Write_2c_1L01_1V], (instrs STR_ZXI)>; + +// Contiguous store, scalar + imm +def : InstRW<[V1Write_2c_1L01_1V], (instregex "^ST1[BHWD]_IMM$", + "^ST1B_[HSD]_IMM$", + "^ST1H_[SD]_IMM$", + "^ST1W_D_IMM$")>; + +// Contiguous store, scalar + scalar +def : InstRW<[V1Write_2c_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>; +def : InstRW<[V1Write_2c_1L01_1V], (instregex "^ST1[BWD]$", + "^ST1B_[HSD]$", + "^ST1W_D$")>; + +// Contiguous store two structures from two vectors, scalar + imm +def : InstRW<[V1Write_4c_1L01_1V], (instregex "^ST2[BHWD]_IMM$")>; + +// Contiguous store two structures from two vectors, scalar + scalar +def : InstRW<[V1Write_4c_1L01_1S_1V], (instrs ST2H)>; +def : InstRW<[V1Write_4c_1L01_1V], (instregex "^ST2[BWD]$")>; + +// Contiguous store three structures from three vectors, scalar + imm +def : InstRW<[V1Write_7c_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>; + +// Contiguous store three structures from three vectors, scalar + scalar +def : InstRW<[V1Write_7c_5L01_5S_5V], (instregex "^ST3[BHWD]$")>; + +// Contiguous store four structures from four vectors, scalar + imm +def : InstRW<[V1Write_11c_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>; + +// Contiguous store four structures from four vectors, scalar + scalar +def : InstRW<[V1Write_11c_9L01_9S_9V], (instregex "^ST4[BHWD]$")>; + +// Non temporal store, scalar + imm +def : InstRW<[V1Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>; + +// Non temporal store, scalar + scalar +def : InstRW<[V1Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>; +def : InstRW<[V1Write_2c_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>; + +// Scatter store vector + imm 32-bit element size +def : InstRW<[V1Write_10c_2L01_2V], (instregex "^SST1[BH]_S_IMM$", + "^SST1W_IMM$")>; + +// Scatter store vector + imm 64-bit element size +def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_IMM$", + "^SST1D_IMM$")>; + +// Scatter store, 32-bit scaled offset +def : InstRW<[V1Write_10c_2L01_2V], (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>; + +// Scatter store, 32-bit unpacked unscaled offset +def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$", + "^SST1D_[SU]XTW$")>; + +// Scatter store, 32-bit unpacked scaled offset +def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$", + "^SST1D_[SU]XTW_SCALED$")>; + +// Scatter store, 32-bit unscaled offset +def : InstRW<[V1Write_10c_2L01_2V], (instregex "^SST1[BH]_S_[SU]XTW$", + "^SST1W_[SU]XTW$")>; + +// Scatter store, 64-bit scaled offset +def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[HW]_D_SCALED$", + "^SST1D_SCALED$")>; + +// Scatter store, 64-bit unscaled offset +def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D$", + "^SST1D$")>; + + +// SVE Miscellaneous instructions +// ----------------------------------------------------------------------------- + +// Read first fault register, unpredicated +def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P_REAL)>; + +// Read first fault register, predicated +def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz_REAL)>; + +// Read first fault register and set flags +def : InstRW<[V1Write_4c_1M], (instrs RDFFRS_PPz)>; + +// Set first fault register +// Write to first fault register +def : InstRW<[V1Write_2c_1M0], (instrs SETFFR, WRFFR)>; + + +} Index: llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -948,8 +948,8 @@ def V2Wr_IM : SchedWriteRes<[V2UnitM]> { let Latency = 2; } def V2Wr_IMA : SchedWriteRes<[V2UnitM0]> { let Latency = 2; } def V2Wr_IMUL : SchedWriteVariant<[ - SchedVar, - SchedVar]>; + SchedVar, + SchedVar]>; def V2Rd_IMA : SchedReadAdvance<1, [V2Wr_IMA]>; def V2Wr_FMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; } Index: llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td +++ llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td @@ -45,3 +45,12 @@ CheckImmOperand<3, 1>, CheckImmOperand<3, 2>, CheckImmOperand<3, 4>]>]>>; + +// Identify "[SU]?(MADD|MSUB)L?" as the alias for "[SU]?(MUL|MNEG)L?". +def NeoverseMULIdiomPred : MCSchedPredicate< // Rd, Rs, Rv, ZR + CheckAll<[CheckOpcode< + [MADDWrrr, MADDXrrr, + MSUBWrrr, MSUBXrrr, + SMADDLrrr, UMADDLrrr, + SMSUBLrrr, UMSUBLrrr]>, + CheckIsReg3Zero]>>;