Index: llvm/trunk/include/llvm/Support/AArch64TargetParser.def =================================================================== --- llvm/trunk/include/llvm/Support/AArch64TargetParser.def +++ llvm/trunk/include/llvm/Support/AArch64TargetParser.def @@ -73,8 +73,9 @@ (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) AARCH64_CPU_NAME("kryo", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) -AARCH64_CPU_NAME("vulcan", AK_ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) +AARCH64_CPU_NAME("thunderx2t99", AK_ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_SIMD | AArch64::AEK_LSE | AArch64::AEK_CRC | + AArch64::AEK_CRYPTO)) AARCH64_CPU_NAME("thunderx", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_PROFILE)) AARCH64_CPU_NAME("thunderxt88", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, Index: llvm/trunk/lib/Target/AArch64/AArch64.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64.td +++ llvm/trunk/lib/Target/AArch64/AArch64.td @@ -161,7 +161,7 @@ include "AArch64SchedKryo.td" include "AArch64SchedM1.td" include "AArch64SchedThunderX.td" -include "AArch64SchedVulcan.td" +include "AArch64SchedThunderX2T99.td" def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", "Cortex-A35 ARM processors", [ @@ -288,16 +288,18 @@ FeatureZCZeroing ]>; -def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan", - "Broadcom Vulcan processors", [ - FeatureCRC, - FeatureCrypto, - FeatureFPARMv8, - FeatureArithmeticBccFusion, - FeatureNEON, - FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive, - HasV8_1aOps]>; +def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", + "ThunderX2T99", + "Cavium ThunderX2 processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureArithmeticBccFusion, + FeatureNEON, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureLSE, + HasV8_1aOps]>; def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX", "Cavium ThunderX processors", [ @@ -363,12 +365,13 @@ def : ProcessorModel<"exynos-m3", ExynosM1Model, [ProcExynosM2]>; def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>; def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>; -def : ProcessorModel<"vulcan", VulcanModel, [ProcVulcan]>; // Cavium ThunderX/ThunderX T8X Processors def : ProcessorModel<"thunderx", ThunderXT8XModel, [ProcThunderX]>; def : ProcessorModel<"thunderxt88", ThunderXT8XModel, [ProcThunderXT88]>; def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>; def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>; +// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan. +def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>; //===----------------------------------------------------------------------===// // Assembly parser Index: llvm/trunk/lib/Target/AArch64/AArch64SchedThunderX.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64SchedThunderX.td +++ llvm/trunk/lib/Target/AArch64/AArch64SchedThunderX.td @@ -23,6 +23,7 @@ let MicroOpBufferSize = 0; // ThunderX T88/T81/T83 are in-order. let LoadLatency = 3; // Optimistic load latency. let MispredictPenalty = 8; // Branch mispredict penalty. + let PostRAScheduler = 1; // Use PostRA scheduler. let CompleteModel = 1; } Index: llvm/trunk/lib/Target/AArch64/AArch64SchedThunderX2T99.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ llvm/trunk/lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -0,0 +1,852 @@ +//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 Scheduling ---*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the scheduling model for Cavium ThunderX2T99 +// processors. +// Based on Broadcom Vulcan. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// 2. Pipeline Description. + +def ThunderX2T99Model : SchedMachineModel { + let IssueWidth = 4; // 4 micro-ops dispatched at a time. + let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer. + let LoadLatency = 4; // Optimistic load latency. + let MispredictPenalty = 12; // Extra cycles for mispredicted branch. + // Determined via a mix of micro-arch details and experimentation. + let LoopMicroOpBufferSize = 32; + let PostRAScheduler = 1; // Using PostRA sched. + let CompleteModel = 1; +} + +// Define the issue ports. + +// Port 0: ALU, FP/SIMD. +def THX2T99P0 : ProcResource<1>; + +// Port 1: ALU, FP/SIMD, integer mul/div. +def THX2T99P1 : ProcResource<1>; + +// Port 2: ALU, Branch. +def THX2T99P2 : ProcResource<1>; + +// Port 3: Store data. +def THX2T99P3 : ProcResource<1>; + +// Port 4: Load/store. +def THX2T99P4 : ProcResource<1>; + +// Port 5: Load/store. +def THX2T99P5 : ProcResource<1>; + +let SchedModel = ThunderX2T99Model in { + +// Define groups for the functional units on each issue port. Each group +// created will be used by a WriteRes later on. +// +// NOTE: Some groups only contain one member. This is a way to create names for +// the various functional units that share a single issue port. For example, +// "THX2T99I1" for ALU ops on port 1 and "THX2T99F1" for FP ops on port 1. + +// Integer divide and multiply micro-ops only on port 1. +def THX2T99I1 : ProcResGroup<[THX2T99P1]>; + +// Branch micro-ops only on port 2. +def THX2T99I2 : ProcResGroup<[THX2T99P2]>; + +// ALU micro-ops on ports 0, 1, and 2. +def THX2T99I012 : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2]>; + +// Crypto FP/SIMD micro-ops only on port 1. +def THX2T99F1 : ProcResGroup<[THX2T99P1]>; + +// FP/SIMD micro-ops on ports 0 and 1. +def THX2T99F01 : ProcResGroup<[THX2T99P0, THX2T99P1]>; + +// Store data micro-ops only on port 3. +def THX2T99SD : ProcResGroup<[THX2T99P3]>; + +// Load/store micro-ops on ports 4 and 5. +def THX2T99LS01 : ProcResGroup<[THX2T99P4, THX2T99P5]>; + +// 60 entry unified scheduler. +def THX2T99Any : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2, + THX2T99P3, THX2T99P4, THX2T99P5]> { + let BufferSize=60; +} + +// Define commonly used write types for InstRW specializations. +// All definitions follow the format: THX2T99Write_Cyc_. + +// 3 cycles on I1. +def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 3; } + +// 4 cycles on I1. +def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 4; } + +// 1 cycle on I0, I1, or I2. +def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { let Latency = 1; } + +// 5 cycles on F1. +def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 5; } + +// 7 cycles on F1. +def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 7; } + +// 4 cycles on F0 or F1. +def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 4; } + +// 5 cycles on F0 or F1. +def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 5; } + +// 6 cycles on F0 or F1. +def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 6; } + +// 7 cycles on F0 or F1. +def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 7; } + +// 8 cycles on F0 or F1. +def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 8; } + +// 16 cycles on F0 or F1. +def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 16; + let ResourceCycles = [8]; +} + +// 23 cycles on F0 or F1. +def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 23; + let ResourceCycles = [11]; +} + +// 1 cycles on LS0 or LS1. +def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 1; } + +// 4 cycles on LS0 or LS1. +def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 4; } + +// 5 cycles on LS0 or LS1. +def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 5; } + +// 6 cycles on LS0 or LS1. +def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 6; } + +// 5 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_5Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 5; + let NumMicroOps = 2; +} + +// 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_6Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 1 cycles on LS0 or LS1 and F0 or F1. +def THX2T99Write_1Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 5 cycles on LS0 or LS1 and F0 or F1. +def THX2T99Write_5Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { + let Latency = 5; + let NumMicroOps = 2; +} + +// 6 cycles on LS0 or LS1 and F0 or F1. +def THX2T99Write_6Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { + let Latency = 6; + let NumMicroOps = 2; +} + +// 7 cycles on LS0 or LS1 and F0 or F1. +def THX2T99Write_7Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { + let Latency = 7; + let NumMicroOps = 2; +} + +// 8 cycles on LS0 or LS1 and F0 or F1. +def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { + let Latency = 8; + let NumMicroOps = 2; +} + +// Define commonly used read types. + +// No forwarding is provided for these types. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +} + + +//===----------------------------------------------------------------------===// +// 3. Instruction Tables. + +let SchedModel = ThunderX2T99Model in { + +//--- +// 3.1 Branch Instructions +//--- + +// Branch, immed +// Branch and link, immed +// Compare and branch +def : WriteRes { let Latency = 1; } + +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } + +def : WriteRes { let Unsupported = 1; } + +// Branch, register +// Branch and link, register != LR +// Branch and link, register = LR +def : WriteRes { let Latency = 1; } + +//--- +// 3.2 Arithmetic and Logical Instructions +// 3.3 Move and Shift Instructions +//--- + +// ALU, basic +// Conditional compare +// Conditional select +// Address generation +def : WriteRes { let Latency = 1; } +def : InstRW<[WriteI], (instrs COPY)>; + +// ALU, extend and/or shift +def : WriteRes { + let Latency = 2; + let ResourceCycles = [2]; +} + +def : WriteRes { + let Latency = 2; + let ResourceCycles = [2]; +} + +// Move immed +def : WriteRes { let Latency = 1; } + +// Variable shift +def : WriteRes { let Latency = 1; } + +//--- +// 3.4 Divide and Multiply Instructions +//--- + +// Divide, W-form +// Latency range of 13-23. Take the average. +def : WriteRes { + let Latency = 18; + let ResourceCycles = [18]; +} + +// Divide, X-form +// Latency range of 13-39. Take the average. +def : WriteRes { + let Latency = 26; + let ResourceCycles = [26]; +} + +// Multiply accumulate, W-form +def : WriteRes { let Latency = 5; } + +// Multiply accumulate, X-form +def : WriteRes { let Latency = 5; } + +// Bitfield extract, two reg +def : WriteRes { let Latency = 1; } + +// Bitfield move, basic +// Bitfield move, insert +// NOTE: Handled by WriteIS. + +// Count leading +def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$", + "^CLZ(W|X)r$")>; + +// Reverse bits/bytes +// NOTE: Handled by WriteI. + +//--- +// 3.6 Load Instructions +// 3.10 FP Load Instructions +//--- + +// Load register, literal +// Load register, unscaled immed +// Load register, immed unprivileged +// Load register, unsigned immed +def : WriteRes { let Latency = 4; } + +// Load register, immed post-index +// NOTE: Handled by WriteLD, WriteI. +// Load register, immed pre-index +// NOTE: Handled by WriteLD, WriteAdr. +def : WriteRes { let Latency = 1; } + +// Load register offset, basic +// Load register, register offset, scale by 4/8 +// Load register, register offset, scale by 2 +// Load register offset, extend +// Load register, register offset, extend, scale by 4/8 +// Load register, register offset, extend, scale by 2 +def THX2T99WriteLDIdx : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; + +def THX2T99ReadAdrBase : SchedReadVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; + +// Load pair, immed offset, normal +// Load pair, immed offset, signed words, base != SP +// Load pair, immed offset signed words, base = SP +// LDP only breaks into *one* LS micro-op. Thus +// the resources are handling by WriteLD. +def : WriteRes { + let Latency = 5; +} + +// Load pair, immed pre-index, normal +// Load pair, immed pre-index, signed words +// Load pair, immed post-index, normal +// Load pair, immed post-index, signed words +// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. + +//-- +// 3.7 Store Instructions +// 3.11 FP Store Instructions +//-- + +// Store register, unscaled immed +// Store register, immed unprivileged +// Store register, unsigned immed +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +// Store register, immed post-index +// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase + +// Store register, immed pre-index +// NOTE: Handled by WriteAdr, WriteST + +// Store register, register offset, basic +// Store register, register offset, scaled by 4/8 +// Store register, register offset, scaled by 2 +// Store register, register offset, extend +// Store register, register offset, extend, scale by 4/8 +// Store register, register offset, extend, scale by 1 +def : WriteRes { + let Latency = 1; + let NumMicroOps = 3; +} + +// Store pair, immed offset, W-form +// Store pair, immed offset, X-form +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +// Store pair, immed post-index, W-form +// Store pair, immed post-index, X-form +// Store pair, immed pre-index, W-form +// Store pair, immed pre-index, X-form +// NOTE: Handled by WriteAdr, WriteSTP. + +//--- +// 3.8 FP Data Processing Instructions +//--- + +// FP absolute value +// FP min/max +// FP negate +def : WriteRes { let Latency = 5; } + +// FP arithmetic +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>; + +// FP compare +def : WriteRes { let Latency = 5; } + +// FP divide, S-form +// FP square root, S-form +def : WriteRes { + let Latency = 16; + let ResourceCycles = [8]; +} + +// FP divide, D-form +// FP square root, D-form +def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>; + +// FP multiply +// FP multiply accumulate +def : WriteRes { let Latency = 6; } + +// FP round to integral +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; + +// FP select +def : InstRW<[THX2T99Write_4Cyc_F01], (instregex "^FCSEL")>; + +//--- +// 3.9 FP Miscellaneous Instructions +//--- + +// FP convert, from vec to vec reg +// FP convert, from gen to vec reg +// FP convert, from vec to gen reg +def : WriteRes { let Latency = 7; } + +// FP move, immed +// FP move, register +def : WriteRes { let Latency = 4; } + +// FP transfer, from gen to vec reg +// FP transfer, from vec to gen reg +def : WriteRes { let Latency = 4; } +def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; + +//--- +// 3.12 ASIMD Integer Instructions +//--- + +// ASIMD absolute diff, D-form +// ASIMD absolute diff, Q-form +// ASIMD absolute diff accum, D-form +// ASIMD absolute diff accum, Q-form +// ASIMD absolute diff accum long +// ASIMD absolute diff long +// ASIMD arith, basic +// ASIMD arith, complex +// ASIMD compare +// ASIMD logical (AND, BIC, EOR) +// ASIMD max/min, basic +// ASIMD max/min, reduce, 4H/4S +// ASIMD max/min, reduce, 8B/8H +// ASIMD max/min, reduce, 16B +// ASIMD multiply, D-form +// ASIMD multiply, Q-form +// ASIMD multiply accumulate long +// ASIMD multiply accumulate saturating long +// ASIMD multiply long +// ASIMD pairwise add and accumulate +// ASIMD shift accumulate +// ASIMD shift by immed, basic +// ASIMD shift by immed and insert, basic, D-form +// ASIMD shift by immed and insert, basic, Q-form +// ASIMD shift by immed, complex +// ASIMD shift by register, basic, D-form +// ASIMD shift by register, basic, Q-form +// ASIMD shift by register, complex, D-form +// ASIMD shift by register, complex, Q-form +def : WriteRes { let Latency = 7; } + +// ASIMD arith, reduce, 4H/4S +// ASIMD arith, reduce, 8B/8H +// ASIMD arith, reduce, 16B +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; + +// ASIMD logical (MOV, MVN, ORN, ORR) +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>; + +// ASIMD polynomial (8x8) multiply long +def : InstRW<[THX2T99Write_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>; + +//--- +// 3.13 ASIMD Floating-point Instructions +//--- + +// ASIMD FP absolute value +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FABSv")>; + +// ASIMD FP arith, normal, D-form +// ASIMD FP arith, normal, Q-form +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>; + +// ASIMD FP arith,pairwise, D-form +// ASIMD FP arith, pairwise, Q-form +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADDPv")>; + +// ASIMD FP compare, D-form +// ASIMD FP compare, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv", + "^FCMGTv", "^FCMLEv", + "^FCMLTv")>; + +// ASIMD FP convert, long +// ASIMD FP convert, narrow +// ASIMD FP convert, other, D-form +// ASIMD FP convert, other, Q-form +// NOTE: Handled by WriteV. + +// ASIMD FP divide, D-form, F32 +def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv2f32)>; + +// ASIMD FP divide, Q-form, F32 +def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv4f32)>; + +// ASIMD FP divide, Q-form, F64 +def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVv2f64)>; + +// ASIMD FP max/min, normal, D-form +// ASIMD FP max/min, normal, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv", + "^FMINv", "^FMINNMv")>; + +// ASIMD FP max/min, pairwise, D-form +// ASIMD FP max/min, pairwise, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv", + "^FMINPv", "^FMINNMPv")>; + +// ASIMD FP max/min, reduce +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv", + "^FMINVv", "^FMINNMVv")>; + +// ASIMD FP multiply, D-form, FZ +// ASIMD FP multiply, D-form, no FZ +// ASIMD FP multiply, Q-form, FZ +// ASIMD FP multiply, Q-form, no FZ +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>; + +// ASIMD FP multiply accumulate, Dform, FZ +// ASIMD FP multiply accumulate, Dform, no FZ +// ASIMD FP multiply accumulate, Qform, FZ +// ASIMD FP multiply accumulate, Qform, no FZ +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>; + +// ASIMD FP negate +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FNEGv")>; + +// ASIMD FP round, D-form +// ASIMD FP round, Q-form +// NOTE: Handled by WriteV. + +//-- +// 3.14 ASIMD Miscellaneous Instructions +//-- + +// ASIMD bit reverse +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>; + +// ASIMD bitwise insert, D-form +// ASIMD bitwise insert, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>; + +// ASIMD count, D-form +// ASIMD count, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>; + +// ASIMD duplicate, gen reg +// ASIMD duplicate, element +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>; + +// ASIMD extract +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^EXTv")>; + +// ASIMD extract narrow +// ASIMD extract narrow, saturating +// NOTE: Handled by WriteV. + +// ASIMD insert, element to element +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; + +// ASIMD move, integer immed +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>; + +// ASIMD move, FP immed +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>; + +// ASIMD reciprocal estimate, D-form +// ASIMD reciprocal estimate, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", + "^FRSQRTEv", "^URSQRTEv")>; + +// ASIMD reciprocal step, D-form, FZ +// ASIMD reciprocal step, D-form, no FZ +// ASIMD reciprocal step, Q-form, FZ +// ASIMD reciprocal step, Q-form, no FZ +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>; + +// ASIMD reverse +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^REV16v", "^REV32v", "^REV64v")>; + +// ASIMD table lookup, D-form +// ASIMD table lookup, Q-form +def : InstRW<[THX2T99Write_8Cyc_F01], (instregex "^TBLv", "^TBXv")>; + +// ASIMD transfer, element to word or word +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^UMOVv")>; + +// ASIMD transfer, element to gen reg +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>; + +// ASIMD transfer gen reg to element +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; + +// ASIMD transpose +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1v", "^TRN2v", + "^UZP1v", "^UZP2v")>; + +// ASIMD unzip/zip +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>; + +//-- +// 3.15 ASIMD Load Instructions +//-- + +// ASIMD load, 1 element, multiple, 1 reg, D-form +// ASIMD load, 1 element, multiple, 1 reg, Q-form +def : InstRW<[THX2T99Write_4Cyc_LS01], + (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], + (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 2 reg, D-form +// ASIMD load, 1 element, multiple, 2 reg, Q-form +def : InstRW<[THX2T99Write_4Cyc_LS01], + (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], + (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 3 reg, D-form +// ASIMD load, 1 element, multiple, 3 reg, Q-form +def : InstRW<[THX2T99Write_5Cyc_LS01], + (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr], + (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 4 reg, D-form +// ASIMD load, 1 element, multiple, 4 reg, Q-form +def : InstRW<[THX2T99Write_6Cyc_LS01], + (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr], + (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, one lane, B/H/S +// ASIMD load, 1 element, one lane, D +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], + (instregex "^LD1i(8|16|32|64)_POST$")>; + +// ASIMD load, 1 element, all lanes, D-form, B/H/S +// ASIMD load, 1 element, all lanes, D-form, D +// ASIMD load, 1 element, all lanes, Q-form +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], + (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], + (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 2 element, multiple, D-form, B/H/S +// ASIMD load, 2 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], + (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], + (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 2 element, one lane, B/H +// ASIMD load, 2 element, one lane, S +// ASIMD load, 2 element, one lane, D +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], + (instregex "^LD2i(8|16|32|64)_POST$")>; + +// ASIMD load, 2 element, all lanes, D-form, B/H/S +// ASIMD load, 2 element, all lanes, D-form, D +// ASIMD load, 2 element, all lanes, Q-form +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], + (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], + (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 3 element, multiple, D-form, B/H/S +// ASIMD load, 3 element, multiple, Q-form, B/H/S +// ASIMD load, 3 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_8Cyc_LS01_F01], + (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], + (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 3 element, one lone, B/H +// ASIMD load, 3 element, one lane, S +// ASIMD load, 3 element, one lane, D +def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], + (instregex "^LD3i(8|16|32|64)_POST$")>; + +// ASIMD load, 3 element, all lanes, D-form, B/H/S +// ASIMD load, 3 element, all lanes, D-form, D +// ASIMD load, 3 element, all lanes, Q-form, B/H/S +// ASIMD load, 3 element, all lanes, Q-form, D +def : InstRW<[THX2T99Write_7Cyc_LS01_F01], + (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], + (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 4 element, multiple, D-form, B/H/S +// ASIMD load, 4 element, multiple, Q-form, B/H/S +// ASIMD load, 4 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_8Cyc_LS01_F01], + (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], + (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 4 element, one lane, B/H +// ASIMD load, 4 element, one lane, S +// ASIMD load, 4 element, one lane, D +def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], + (instregex "^LD4i(8|16|32|64)_POST$")>; + +// ASIMD load, 4 element, all lanes, D-form, B/H/S +// ASIMD load, 4 element, all lanes, D-form, D +// ASIMD load, 4 element, all lanes, Q-form, B/H/S +// ASIMD load, 4 element, all lanes, Q-form, D +def : InstRW<[THX2T99Write_6Cyc_LS01_F01], + (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], + (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +//-- +// 3.16 ASIMD Store Instructions +//-- + +// ASIMD store, 1 element, multiple, 1 reg, D-form +// ASIMD store, 1 element, multiple, 1 reg, Q-form +def : InstRW<[THX2T99Write_1Cyc_LS01], + (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 2 reg, D-form +// ASIMD store, 1 element, multiple, 2 reg, Q-form +def : InstRW<[THX2T99Write_1Cyc_LS01], + (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 3 reg, D-form +// ASIMD store, 1 element, multiple, 3 reg, Q-form +def : InstRW<[THX2T99Write_1Cyc_LS01], + (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 4 reg, D-form +// ASIMD store, 1 element, multiple, 4 reg, Q-form +def : InstRW<[THX2T99Write_1Cyc_LS01], + (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, one lane, B/H/S +// ASIMD store, 1 element, one lane, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], + (instregex "^ST1i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST1i(8|16|32|64)_POST$")>; + +// ASIMD store, 2 element, multiple, D-form, B/H/S +// ASIMD store, 2 element, multiple, Q-form, B/H/S +// ASIMD store, 2 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], + (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 2 element, one lane, B/H/S +// ASIMD store, 2 element, one lane, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], + (instregex "^ST2i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST2i(8|16|32|64)_POST$")>; + +// ASIMD store, 3 element, multiple, D-form, B/H/S +// ASIMD store, 3 element, multiple, Q-form, B/H/S +// ASIMD store, 3 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], + (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 3 element, one lane, B/H +// ASIMD store, 3 element, one lane, S +// ASIMD store, 3 element, one lane, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST3i(8|16|32|64)_POST$")>; + +// ASIMD store, 4 element, multiple, D-form, B/H/S +// ASIMD store, 4 element, multiple, Q-form, B/H/S +// ASIMD store, 4 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], + (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 4 element, one lane, B/H +// ASIMD store, 4 element, one lane, S +// ASIMD store, 4 element, one lane, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST4i(8|16|32|64)_POST$")>; + +//-- +// 3.17 Cryptography Extensions +//-- + +// Crypto AES ops +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES")>; + +// Crypto polynomial (64x64) multiply long +def : InstRW<[THX2T99Write_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>; + +// Crypto SHA1 xor ops +// Crypto SHA1 schedule acceleration ops +// Crypto SHA256 schedule acceleration op (1 u-op) +// Crypto SHA256 schedule acceleration op (2 u-ops) +// Crypto SHA256 hash acceleration ops +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA")>; + +//-- +// 3.18 CRC +//-- + +// CRC checksum ops +def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32")>; + +} // SchedModel = ThunderX2T99Model Index: llvm/trunk/lib/Target/AArch64/AArch64SchedVulcan.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64SchedVulcan.td +++ llvm/trunk/lib/Target/AArch64/AArch64SchedVulcan.td @@ -1,852 +0,0 @@ -//=- AArch64SchedVulcan.td - Vulcan Scheduling Defs ----------*- tablegen -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// 1. Introduction -// -// This file defines the machine model for Broadcom Vulcan to support -// instruction scheduling and other instruction cost heuristics. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// 2. Pipeline Description. - -def VulcanModel : SchedMachineModel { - let IssueWidth = 4; // 4 micro-ops dispatched at a time. - let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer. - let LoadLatency = 4; // Optimistic load latency. - let MispredictPenalty = 12; // Extra cycles for mispredicted branch. - // Determined via a mix of micro-arch details and experimentation. - let LoopMicroOpBufferSize = 32; - let PostRAScheduler = 1; // Using PostRA sched. - let CompleteModel = 1; -} - -// Define the issue ports. - -// Port 0: ALU, FP/SIMD. -def VulcanP0 : ProcResource<1>; - -// Port 1: ALU, FP/SIMD, integer mul/div. -def VulcanP1 : ProcResource<1>; - -// Port 2: ALU, Branch. -def VulcanP2 : ProcResource<1>; - -// Port 3: Store data. -def VulcanP3 : ProcResource<1>; - -// Port 4: Load/store. -def VulcanP4 : ProcResource<1>; - -// Port 5: Load/store. -def VulcanP5 : ProcResource<1>; - -let SchedModel = VulcanModel in { - -// Define groups for the functional units on each issue port. Each group -// created will be used by a WriteRes later on. -// -// NOTE: Some groups only contain one member. This is a way to create names for -// the various functional units that share a single issue port. For example, -// "VulcanI1" for ALU ops on port 1 and "VulcanF1" for FP ops on port 1. - -// Integer divide and multiply micro-ops only on port 1. -def VulcanI1 : ProcResGroup<[VulcanP1]>; - -// Branch micro-ops only on port 2. -def VulcanI2 : ProcResGroup<[VulcanP2]>; - -// ALU micro-ops on ports 0, 1, and 2. -def VulcanI012 : ProcResGroup<[VulcanP0, VulcanP1, VulcanP2]>; - -// Crypto FP/SIMD micro-ops only on port 1. -def VulcanF1 : ProcResGroup<[VulcanP1]>; - -// FP/SIMD micro-ops on ports 0 and 1. -def VulcanF01 : ProcResGroup<[VulcanP0, VulcanP1]>; - -// Store data micro-ops only on port 3. -def VulcanSD : ProcResGroup<[VulcanP3]>; - -// Load/store micro-ops on ports 4 and 5. -def VulcanLS01 : ProcResGroup<[VulcanP4, VulcanP5]>; - -// 60 entry unified scheduler. -def VulcanAny : ProcResGroup<[VulcanP0, VulcanP1, VulcanP2, - VulcanP3, VulcanP4, VulcanP5]> { - let BufferSize=60; -} - -// Define commonly used write types for InstRW specializations. -// All definitions follow the format: VulcanWrite_Cyc_. - -// 3 cycles on I1. -def VulcanWrite_3Cyc_I1 : SchedWriteRes<[VulcanI1]> { let Latency = 3; } - -// 4 cycles on I1. -def VulcanWrite_4Cyc_I1 : SchedWriteRes<[VulcanI1]> { let Latency = 4; } - -// 1 cycle on I0, I1, or I2. -def VulcanWrite_1Cyc_I012 : SchedWriteRes<[VulcanI012]> { let Latency = 1; } - -// 5 cycles on F1. -def VulcanWrite_5Cyc_F1 : SchedWriteRes<[VulcanF1]> { let Latency = 5; } - -// 7 cycles on F1. -def VulcanWrite_7Cyc_F1 : SchedWriteRes<[VulcanF1]> { let Latency = 7; } - -// 4 cycles on F0 or F1. -def VulcanWrite_4Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 4; } - -// 5 cycles on F0 or F1. -def VulcanWrite_5Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 5; } - -// 6 cycles on F0 or F1. -def VulcanWrite_6Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 6; } - -// 7 cycles on F0 or F1. -def VulcanWrite_7Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 7; } - -// 8 cycles on F0 or F1. -def VulcanWrite_8Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 8; } - -// 16 cycles on F0 or F1. -def VulcanWrite_16Cyc_F01 : SchedWriteRes<[VulcanF01]> { - let Latency = 16; - let ResourceCycles = [8]; -} - -// 23 cycles on F0 or F1. -def VulcanWrite_23Cyc_F01 : SchedWriteRes<[VulcanF01]> { - let Latency = 23; - let ResourceCycles = [11]; -} - -// 1 cycles on LS0 or LS1. -def VulcanWrite_1Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 1; } - -// 4 cycles on LS0 or LS1. -def VulcanWrite_4Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 4; } - -// 5 cycles on LS0 or LS1. -def VulcanWrite_5Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 5; } - -// 6 cycles on LS0 or LS1. -def VulcanWrite_6Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 6; } - -// 5 cycles on LS0 or LS1 and I0, I1, or I2. -def VulcanWrite_5Cyc_LS01_I012 : SchedWriteRes<[VulcanLS01, VulcanI012]> { - let Latency = 5; - let NumMicroOps = 2; -} - -// 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2. -def VulcanWrite_6Cyc_LS01_I012_I012 : - SchedWriteRes<[VulcanLS01, VulcanI012, VulcanI012]> { - let Latency = 6; - let NumMicroOps = 3; -} - -// 1 cycles on LS0 or LS1 and F0 or F1. -def VulcanWrite_1Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> { - let Latency = 1; - let NumMicroOps = 2; -} - -// 5 cycles on LS0 or LS1 and F0 or F1. -def VulcanWrite_5Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> { - let Latency = 5; - let NumMicroOps = 2; -} - -// 6 cycles on LS0 or LS1 and F0 or F1. -def VulcanWrite_6Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> { - let Latency = 6; - let NumMicroOps = 2; -} - -// 7 cycles on LS0 or LS1 and F0 or F1. -def VulcanWrite_7Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> { - let Latency = 7; - let NumMicroOps = 2; -} - -// 8 cycles on LS0 or LS1 and F0 or F1. -def VulcanWrite_8Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> { - let Latency = 8; - let NumMicroOps = 2; -} - -// Define commonly used read types. - -// No forwarding is provided for these types. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -} - - -//===----------------------------------------------------------------------===// -// 3. Instruction Tables. - -let SchedModel = VulcanModel in { - -//--- -// 3.1 Branch Instructions -//--- - -// Branch, immed -// Branch and link, immed -// Compare and branch -def : WriteRes { let Latency = 1; } - -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -def : WriteRes { let Unsupported = 1; } - -// Branch, register -// Branch and link, register != LR -// Branch and link, register = LR -def : WriteRes { let Latency = 1; } - -//--- -// 3.2 Arithmetic and Logical Instructions -// 3.3 Move and Shift Instructions -//--- - -// ALU, basic -// Conditional compare -// Conditional select -// Address generation -def : WriteRes { let Latency = 1; } -def : InstRW<[WriteI], (instrs COPY)>; - -// ALU, extend and/or shift -def : WriteRes { - let Latency = 2; - let ResourceCycles = [2]; -} - -def : WriteRes { - let Latency = 2; - let ResourceCycles = [2]; -} - -// Move immed -def : WriteRes { let Latency = 1; } - -// Variable shift -def : WriteRes { let Latency = 1; } - -//--- -// 3.4 Divide and Multiply Instructions -//--- - -// Divide, W-form -// Latency range of 13-23. Take the average. -def : WriteRes { - let Latency = 18; - let ResourceCycles = [18]; -} - -// Divide, X-form -// Latency range of 13-39. Take the average. -def : WriteRes { - let Latency = 26; - let ResourceCycles = [26]; -} - -// Multiply accumulate, W-form -def : WriteRes { let Latency = 5; } - -// Multiply accumulate, X-form -def : WriteRes { let Latency = 5; } - -// Bitfield extract, two reg -def : WriteRes { let Latency = 1; } - -// Bitfield move, basic -// Bitfield move, insert -// NOTE: Handled by WriteIS. - -// Count leading -def : InstRW<[VulcanWrite_3Cyc_I1], (instregex "^CLS(W|X)r$", - "^CLZ(W|X)r$")>; - -// Reverse bits/bytes -// NOTE: Handled by WriteI. - -//--- -// 3.6 Load Instructions -// 3.10 FP Load Instructions -//--- - -// Load register, literal -// Load register, unscaled immed -// Load register, immed unprivileged -// Load register, unsigned immed -def : WriteRes { let Latency = 4; } - -// Load register, immed post-index -// NOTE: Handled by WriteLD, WriteI. -// Load register, immed pre-index -// NOTE: Handled by WriteLD, WriteAdr. -def : WriteRes { let Latency = 1; } - -// Load register offset, basic -// Load register, register offset, scale by 4/8 -// Load register, register offset, scale by 2 -// Load register offset, extend -// Load register, register offset, extend, scale by 4/8 -// Load register, register offset, extend, scale by 2 -def VulcanWriteLDIdx : SchedWriteVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; - -def VulcanReadAdrBase : SchedReadVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; - -// Load pair, immed offset, normal -// Load pair, immed offset, signed words, base != SP -// Load pair, immed offset signed words, base = SP -// LDP only breaks into *one* LS micro-op. Thus -// the resources are handling by WriteLD. -def : WriteRes { - let Latency = 5; -} - -// Load pair, immed pre-index, normal -// Load pair, immed pre-index, signed words -// Load pair, immed post-index, normal -// Load pair, immed post-index, signed words -// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. - -//-- -// 3.7 Store Instructions -// 3.11 FP Store Instructions -//-- - -// Store register, unscaled immed -// Store register, immed unprivileged -// Store register, unsigned immed -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -// Store register, immed post-index -// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase - -// Store register, immed pre-index -// NOTE: Handled by WriteAdr, WriteST - -// Store register, register offset, basic -// Store register, register offset, scaled by 4/8 -// Store register, register offset, scaled by 2 -// Store register, register offset, extend -// Store register, register offset, extend, scale by 4/8 -// Store register, register offset, extend, scale by 1 -def : WriteRes { - let Latency = 1; - let NumMicroOps = 3; -} - -// Store pair, immed offset, W-form -// Store pair, immed offset, X-form -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} - -// Store pair, immed post-index, W-form -// Store pair, immed post-index, X-form -// Store pair, immed pre-index, W-form -// Store pair, immed pre-index, X-form -// NOTE: Handled by WriteAdr, WriteSTP. - -//--- -// 3.8 FP Data Processing Instructions -//--- - -// FP absolute value -// FP min/max -// FP negate -def : WriteRes { let Latency = 5; } - -// FP arithmetic -def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FADD", "^FSUB")>; - -// FP compare -def : WriteRes { let Latency = 5; } - -// FP divide, S-form -// FP square root, S-form -def : WriteRes { - let Latency = 16; - let ResourceCycles = [8]; -} - -// FP divide, D-form -// FP square root, D-form -def : InstRW<[VulcanWrite_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>; - -// FP multiply -// FP multiply accumulate -def : WriteRes { let Latency = 6; } - -// FP round to integral -def : InstRW<[VulcanWrite_7Cyc_F01], - (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; - -// FP select -def : InstRW<[VulcanWrite_4Cyc_F01], (instregex "^FCSEL")>; - -//--- -// 3.9 FP Miscellaneous Instructions -//--- - -// FP convert, from vec to vec reg -// FP convert, from gen to vec reg -// FP convert, from vec to gen reg -def : WriteRes { let Latency = 7; } - -// FP move, immed -// FP move, register -def : WriteRes { let Latency = 4; } - -// FP transfer, from gen to vec reg -// FP transfer, from vec to gen reg -def : WriteRes { let Latency = 4; } -def : InstRW<[VulcanWrite_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; - -//--- -// 3.12 ASIMD Integer Instructions -//--- - -// ASIMD absolute diff, D-form -// ASIMD absolute diff, Q-form -// ASIMD absolute diff accum, D-form -// ASIMD absolute diff accum, Q-form -// ASIMD absolute diff accum long -// ASIMD absolute diff long -// ASIMD arith, basic -// ASIMD arith, complex -// ASIMD compare -// ASIMD logical (AND, BIC, EOR) -// ASIMD max/min, basic -// ASIMD max/min, reduce, 4H/4S -// ASIMD max/min, reduce, 8B/8H -// ASIMD max/min, reduce, 16B -// ASIMD multiply, D-form -// ASIMD multiply, Q-form -// ASIMD multiply accumulate long -// ASIMD multiply accumulate saturating long -// ASIMD multiply long -// ASIMD pairwise add and accumulate -// ASIMD shift accumulate -// ASIMD shift by immed, basic -// ASIMD shift by immed and insert, basic, D-form -// ASIMD shift by immed and insert, basic, Q-form -// ASIMD shift by immed, complex -// ASIMD shift by register, basic, D-form -// ASIMD shift by register, basic, Q-form -// ASIMD shift by register, complex, D-form -// ASIMD shift by register, complex, Q-form -def : WriteRes { let Latency = 7; } - -// ASIMD arith, reduce, 4H/4S -// ASIMD arith, reduce, 8B/8H -// ASIMD arith, reduce, 16B -def : InstRW<[VulcanWrite_5Cyc_F01], - (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; - -// ASIMD logical (MOV, MVN, ORN, ORR) -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>; - -// ASIMD polynomial (8x8) multiply long -def : InstRW<[VulcanWrite_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>; - -//--- -// 3.13 ASIMD Floating-point Instructions -//--- - -// ASIMD FP absolute value -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FABSv")>; - -// ASIMD FP arith, normal, D-form -// ASIMD FP arith, normal, Q-form -def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>; - -// ASIMD FP arith,pairwise, D-form -// ASIMD FP arith, pairwise, Q-form -def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FADDPv")>; - -// ASIMD FP compare, D-form -// ASIMD FP compare, Q-form -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>; -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv", - "^FCMGTv", "^FCMLEv", - "^FCMLTv")>; - -// ASIMD FP convert, long -// ASIMD FP convert, narrow -// ASIMD FP convert, other, D-form -// ASIMD FP convert, other, Q-form -// NOTE: Handled by WriteV. - -// ASIMD FP divide, D-form, F32 -def : InstRW<[VulcanWrite_16Cyc_F01], (instrs FDIVv2f32)>; - -// ASIMD FP divide, Q-form, F32 -def : InstRW<[VulcanWrite_16Cyc_F01], (instrs FDIVv4f32)>; - -// ASIMD FP divide, Q-form, F64 -def : InstRW<[VulcanWrite_23Cyc_F01], (instrs FDIVv2f64)>; - -// ASIMD FP max/min, normal, D-form -// ASIMD FP max/min, normal, Q-form -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv", - "^FMINv", "^FMINNMv")>; - -// ASIMD FP max/min, pairwise, D-form -// ASIMD FP max/min, pairwise, Q-form -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv", - "^FMINPv", "^FMINNMPv")>; - -// ASIMD FP max/min, reduce -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv", - "^FMINVv", "^FMINNMVv")>; - -// ASIMD FP multiply, D-form, FZ -// ASIMD FP multiply, D-form, no FZ -// ASIMD FP multiply, Q-form, FZ -// ASIMD FP multiply, Q-form, no FZ -def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>; - -// ASIMD FP multiply accumulate, Dform, FZ -// ASIMD FP multiply accumulate, Dform, no FZ -// ASIMD FP multiply accumulate, Qform, FZ -// ASIMD FP multiply accumulate, Qform, no FZ -def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>; - -// ASIMD FP negate -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FNEGv")>; - -// ASIMD FP round, D-form -// ASIMD FP round, Q-form -// NOTE: Handled by WriteV. - -//-- -// 3.14 ASIMD Miscellaneous Instructions -//-- - -// ASIMD bit reverse -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^RBITv")>; - -// ASIMD bitwise insert, D-form -// ASIMD bitwise insert, Q-form -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>; - -// ASIMD count, D-form -// ASIMD count, Q-form -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>; - -// ASIMD duplicate, gen reg -// ASIMD duplicate, element -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^DUPv")>; - -// ASIMD extract -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^EXTv")>; - -// ASIMD extract narrow -// ASIMD extract narrow, saturating -// NOTE: Handled by WriteV. - -// ASIMD insert, element to element -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^INSv")>; - -// ASIMD move, integer immed -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>; - -// ASIMD move, FP immed -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMOVv")>; - -// ASIMD reciprocal estimate, D-form -// ASIMD reciprocal estimate, Q-form -def : InstRW<[VulcanWrite_5Cyc_F01], - (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", - "^FRSQRTEv", "^URSQRTEv")>; - -// ASIMD reciprocal step, D-form, FZ -// ASIMD reciprocal step, D-form, no FZ -// ASIMD reciprocal step, Q-form, FZ -// ASIMD reciprocal step, Q-form, no FZ -def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>; - -// ASIMD reverse -def : InstRW<[VulcanWrite_5Cyc_F01], - (instregex "^REV16v", "^REV32v", "^REV64v")>; - -// ASIMD table lookup, D-form -// ASIMD table lookup, Q-form -def : InstRW<[VulcanWrite_8Cyc_F01], (instregex "^TBLv", "^TBXv")>; - -// ASIMD transfer, element to word or word -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^UMOVv")>; - -// ASIMD transfer, element to gen reg -def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>; - -// ASIMD transfer gen reg to element -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^INSv")>; - -// ASIMD transpose -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^TRN1v", "^TRN2v", - "^UZP1v", "^UZP2v")>; - -// ASIMD unzip/zip -def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>; - -//-- -// 3.15 ASIMD Load Instructions -//-- - -// ASIMD load, 1 element, multiple, 1 reg, D-form -// ASIMD load, 1 element, multiple, 1 reg, Q-form -def : InstRW<[VulcanWrite_4Cyc_LS01], - (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_4Cyc_LS01, WriteAdr], - (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 1 element, multiple, 2 reg, D-form -// ASIMD load, 1 element, multiple, 2 reg, Q-form -def : InstRW<[VulcanWrite_4Cyc_LS01], - (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_4Cyc_LS01, WriteAdr], - (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 1 element, multiple, 3 reg, D-form -// ASIMD load, 1 element, multiple, 3 reg, Q-form -def : InstRW<[VulcanWrite_5Cyc_LS01], - (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_5Cyc_LS01, WriteAdr], - (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 1 element, multiple, 4 reg, D-form -// ASIMD load, 1 element, multiple, 4 reg, Q-form -def : InstRW<[VulcanWrite_6Cyc_LS01], - (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_6Cyc_LS01, WriteAdr], - (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 1 element, one lane, B/H/S -// ASIMD load, 1 element, one lane, D -def : InstRW<[VulcanWrite_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>; -def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], - (instregex "^LD1i(8|16|32|64)_POST$")>; - -// ASIMD load, 1 element, all lanes, D-form, B/H/S -// ASIMD load, 1 element, all lanes, D-form, D -// ASIMD load, 1 element, all lanes, Q-form -def : InstRW<[VulcanWrite_5Cyc_LS01_F01], - (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], - (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 2 element, multiple, D-form, B/H/S -// ASIMD load, 2 element, multiple, Q-form, D -def : InstRW<[VulcanWrite_5Cyc_LS01_F01], - (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], - (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 2 element, one lane, B/H -// ASIMD load, 2 element, one lane, S -// ASIMD load, 2 element, one lane, D -def : InstRW<[VulcanWrite_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>; -def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], - (instregex "^LD2i(8|16|32|64)_POST$")>; - -// ASIMD load, 2 element, all lanes, D-form, B/H/S -// ASIMD load, 2 element, all lanes, D-form, D -// ASIMD load, 2 element, all lanes, Q-form -def : InstRW<[VulcanWrite_5Cyc_LS01_F01], - (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr], - (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 3 element, multiple, D-form, B/H/S -// ASIMD load, 3 element, multiple, Q-form, B/H/S -// ASIMD load, 3 element, multiple, Q-form, D -def : InstRW<[VulcanWrite_8Cyc_LS01_F01], - (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_8Cyc_LS01_F01, WriteAdr], - (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 3 element, one lone, B/H -// ASIMD load, 3 element, one lane, S -// ASIMD load, 3 element, one lane, D -def : InstRW<[VulcanWrite_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>; -def : InstRW<[VulcanWrite_7Cyc_LS01_F01, WriteAdr], - (instregex "^LD3i(8|16|32|64)_POST$")>; - -// ASIMD load, 3 element, all lanes, D-form, B/H/S -// ASIMD load, 3 element, all lanes, D-form, D -// ASIMD load, 3 element, all lanes, Q-form, B/H/S -// ASIMD load, 3 element, all lanes, Q-form, D -def : InstRW<[VulcanWrite_7Cyc_LS01_F01], - (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_7Cyc_LS01_F01, WriteAdr], - (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 4 element, multiple, D-form, B/H/S -// ASIMD load, 4 element, multiple, Q-form, B/H/S -// ASIMD load, 4 element, multiple, Q-form, D -def : InstRW<[VulcanWrite_8Cyc_LS01_F01], - (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_8Cyc_LS01_F01, WriteAdr], - (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD load, 4 element, one lane, B/H -// ASIMD load, 4 element, one lane, S -// ASIMD load, 4 element, one lane, D -def : InstRW<[VulcanWrite_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>; -def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr], - (instregex "^LD4i(8|16|32|64)_POST$")>; - -// ASIMD load, 4 element, all lanes, D-form, B/H/S -// ASIMD load, 4 element, all lanes, D-form, D -// ASIMD load, 4 element, all lanes, Q-form, B/H/S -// ASIMD load, 4 element, all lanes, Q-form, D -def : InstRW<[VulcanWrite_6Cyc_LS01_F01], - (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr], - (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -//-- -// 3.16 ASIMD Store Instructions -//-- - -// ASIMD store, 1 element, multiple, 1 reg, D-form -// ASIMD store, 1 element, multiple, 1 reg, Q-form -def : InstRW<[VulcanWrite_1Cyc_LS01], - (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr], - (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 1 element, multiple, 2 reg, D-form -// ASIMD store, 1 element, multiple, 2 reg, Q-form -def : InstRW<[VulcanWrite_1Cyc_LS01], - (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr], - (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 1 element, multiple, 3 reg, D-form -// ASIMD store, 1 element, multiple, 3 reg, Q-form -def : InstRW<[VulcanWrite_1Cyc_LS01], - (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr], - (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 1 element, multiple, 4 reg, D-form -// ASIMD store, 1 element, multiple, 4 reg, Q-form -def : InstRW<[VulcanWrite_1Cyc_LS01], - (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr], - (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 1 element, one lane, B/H/S -// ASIMD store, 1 element, one lane, D -def : InstRW<[VulcanWrite_1Cyc_LS01_F01], - (instregex "^ST1i(8|16|32|64)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], - (instregex "^ST1i(8|16|32|64)_POST$")>; - -// ASIMD store, 2 element, multiple, D-form, B/H/S -// ASIMD store, 2 element, multiple, Q-form, B/H/S -// ASIMD store, 2 element, multiple, Q-form, D -def : InstRW<[VulcanWrite_1Cyc_LS01_F01], - (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], - (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 2 element, one lane, B/H/S -// ASIMD store, 2 element, one lane, D -def : InstRW<[VulcanWrite_1Cyc_LS01_F01], - (instregex "^ST2i(8|16|32|64)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], - (instregex "^ST2i(8|16|32|64)_POST$")>; - -// ASIMD store, 3 element, multiple, D-form, B/H/S -// ASIMD store, 3 element, multiple, Q-form, B/H/S -// ASIMD store, 3 element, multiple, Q-form, D -def : InstRW<[VulcanWrite_1Cyc_LS01_F01], - (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], - (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 3 element, one lane, B/H -// ASIMD store, 3 element, one lane, S -// ASIMD store, 3 element, one lane, D -def : InstRW<[VulcanWrite_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], - (instregex "^ST3i(8|16|32|64)_POST$")>; - -// ASIMD store, 4 element, multiple, D-form, B/H/S -// ASIMD store, 4 element, multiple, Q-form, B/H/S -// ASIMD store, 4 element, multiple, Q-form, D -def : InstRW<[VulcanWrite_1Cyc_LS01_F01], - (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], - (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; - -// ASIMD store, 4 element, one lane, B/H -// ASIMD store, 4 element, one lane, S -// ASIMD store, 4 element, one lane, D -def : InstRW<[VulcanWrite_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>; -def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr], - (instregex "^ST4i(8|16|32|64)_POST$")>; - -//-- -// 3.17 Cryptography Extensions -//-- - -// Crypto AES ops -def : InstRW<[VulcanWrite_5Cyc_F1], (instregex "^AES")>; - -// Crypto polynomial (64x64) multiply long -def : InstRW<[VulcanWrite_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>; - -// Crypto SHA1 xor ops -// Crypto SHA1 schedule acceleration ops -// Crypto SHA256 schedule acceleration op (1 u-op) -// Crypto SHA256 schedule acceleration op (2 u-ops) -// Crypto SHA256 hash acceleration ops -def : InstRW<[VulcanWrite_7Cyc_F1], (instregex "^SHA")>; - -//-- -// 3.18 CRC -//-- - -// CRC checksum ops -def : InstRW<[VulcanWrite_4Cyc_I1], (instregex "^CRC32")>; - -} // SchedModel = VulcanModel Index: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h @@ -45,7 +45,7 @@ ExynosM1, Falkor, Kryo, - Vulcan, + ThunderX2T99, ThunderX, ThunderXT81, ThunderXT83, Index: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp @@ -81,16 +81,22 @@ MinPrefetchStride = 1024; MaxPrefetchIterationsAhead = 11; break; - case Vulcan: + case ThunderX2T99: + CacheLineSize = 64; + PrefFunctionAlignment = 3; + PrefLoopAlignment = 2; MaxInterleaveFactor = 4; + PrefetchDistance = 128; + MinPrefetchStride = 1024; + MaxPrefetchIterationsAhead = 4; break; case ThunderX: case ThunderXT88: case ThunderXT81: case ThunderXT83: CacheLineSize = 128; - PrefFunctionAlignment = 4; - PrefLoopAlignment = 4; + PrefFunctionAlignment = 3; + PrefLoopAlignment = 2; break; case CortexA35: break; case CortexA53: break; Index: llvm/trunk/test/CodeGen/AArch64/cpus.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/cpus.ll +++ llvm/trunk/test/CodeGen/AArch64/cpus.ll @@ -12,7 +12,7 @@ ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=exynos-m3 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=falkor 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=kryo 2>&1 | FileCheck %s -; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=vulcan 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=thunderx2t99 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID ; CHECK-NOT: {{.*}} is not a recognized processor for this target Index: llvm/trunk/test/CodeGen/AArch64/machine-combiner-madd.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/machine-combiner-madd.ll +++ llvm/trunk/test/CodeGen/AArch64/machine-combiner-madd.ll @@ -6,7 +6,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=exynos-m1 < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=exynos-m2 < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=kryo < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=vulcan < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=thunderx2t99 < %s | FileCheck %s ; Make sure that inst-combine fuses the multiply add in the addressing mode of ; the load. Index: llvm/trunk/test/CodeGen/AArch64/remat.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/remat.ll +++ llvm/trunk/test/CodeGen/AArch64/remat.ll @@ -8,7 +8,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=exynos-m3 -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=falkor -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=kryo -o - %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=vulcan -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=thunderx2t99 -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mattr=+custom-cheap-as-move -o - %s | FileCheck %s %X = type { i64, i64, i64 } Index: llvm/trunk/unittests/Support/TargetParserTest.cpp =================================================================== --- llvm/trunk/unittests/Support/TargetParserTest.cpp +++ llvm/trunk/unittests/Support/TargetParserTest.cpp @@ -643,8 +643,9 @@ "kryo", "armv8-a", "crypto-neon-fp-armv8", AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_SIMD, "8-A")); EXPECT_TRUE(testAArch64CPU( - "vulcan", "armv8.1-a", "crypto-neon-fp-armv8", - AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_SIMD, "8.1-A")); + "thunderx2t99", "armv8.1-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_LSE | + AArch64::AEK_SIMD, "8.1-A")); EXPECT_TRUE(testAArch64CPU( "thunderx", "armv8-a", "crypto-neon-fp-armv8", AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_SIMD | @@ -700,7 +701,7 @@ EXPECT_FALSE(testAArch64Extension("cyclone", 0, "ras")); EXPECT_FALSE(testAArch64Extension("exynos-m1", 0, "ras")); EXPECT_FALSE(testAArch64Extension("kryo", 0, "ras")); - EXPECT_FALSE(testAArch64Extension("vulcan", 0, "ras")); + EXPECT_FALSE(testAArch64Extension("thunderx2t99", 0, "ras")); EXPECT_FALSE(testAArch64Extension("thunderx", 0, "lse")); EXPECT_FALSE(testAArch64Extension("thunderxt81", 0, "lse")); EXPECT_FALSE(testAArch64Extension("thunderxt83", 0, "lse"));