Index: include/llvm/Support/AArch64TargetParser.def =================================================================== --- include/llvm/Support/AArch64TargetParser.def +++ include/llvm/Support/AArch64TargetParser.def @@ -75,6 +75,15 @@ (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) AARCH64_CPU_NAME("vulcan", AK_ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) + +AARCH64_CPU_NAME("thunderx", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_LSE | AArch64::AEK_FP | AArch64::AEK_PROFILE)) +AARCH64_CPU_NAME("thunderxt88", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_LSE | AArch64::AEK_FP | AArch64::AEK_PROFILE)) +AARCH64_CPU_NAME("thunderxt81", AK_ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_LSE | AArch64::AEK_FP | AArch64::AEK_PROFILE)) +AARCH64_CPU_NAME("thunderxt83", AK_ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_LSE | AArch64::AEK_FP | AArch64::AEK_PROFILE)) // Invalid CPU AARCH64_CPU_NAME("invalid", AK_INVALID, FK_INVALID, true, AArch64::AEK_INVALID) #undef AARCH64_CPU_NAME Index: lib/Support/TargetParser.cpp =================================================================== --- lib/Support/TargetParser.cpp +++ lib/Support/TargetParser.cpp @@ -448,6 +448,8 @@ Features.push_back("+spe"); if (Extensions & AArch64::AEK_RAS) Features.push_back("+ras"); + if (Extensions & AArch64::AEK_LSE) + Features.push_back("+lse"); return true; } Index: lib/Target/AArch64/AArch64.td =================================================================== --- lib/Target/AArch64/AArch64.td +++ lib/Target/AArch64/AArch64.td @@ -111,7 +111,6 @@ def FeatureUseRSqrt : SubtargetFeature< "use-reciprocal-square-root", "UseRSqrt", "true", "Use the reciprocal square root approximation">; - //===----------------------------------------------------------------------===// // Architectures. // @@ -153,6 +152,7 @@ include "AArch64SchedFalkor.td" include "AArch64SchedKryo.td" include "AArch64SchedM1.td" +include "AArch64SchedThunderX.td" include "AArch64SchedVulcan.td" def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", @@ -288,6 +288,55 @@ FeaturePredictableSelectIsExpensive, HasV8_1aOps]>; +def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX", + "Cavium ThunderX processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureLSE, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureNEON]>; + +def ProcThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily", + "ThunderXT88", + "Cavium ThunderX processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureLSE, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureNEON]>; + +def ProcThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily", + "ThunderXT81", + "Cavium ThunderX processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureLSE, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureNEON, + HasV8_1aOps]>; + +def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily", + "ThunderXT83", + "Cavium ThunderX processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureLSE, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureNEON, + HasV8_1aOps]>; + def : ProcessorModel<"generic", NoSchedModel, [ FeatureCRC, FeatureFPARMv8, @@ -310,6 +359,11 @@ def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>; def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>; def : ProcessorModel<"vulcan", VulcanModel, [ProcVulcan]>; +// Cavium ThunderX/ThunderX T8X Processors +def : ProcessorModel<"thunderx", ThunderXT8XModel, [ProcThunderX]>; +def : ProcessorModel<"thunderxt88", ThunderXT8XModel, [ProcThunderXT88]>; +def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>; +def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>; //===----------------------------------------------------------------------===// // Assembly parser Index: lib/Target/AArch64/AArch64SchedThunderX.td =================================================================== --- lib/Target/AArch64/AArch64SchedThunderX.td +++ lib/Target/AArch64/AArch64SchedThunderX.td @@ -0,0 +1,390 @@ +//==- AArch64SchedTHXT8X.td - Cortex-THXT8X Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM ThunderX T8X +// (T88, T81, T83) processors. +// Loosely based on Cortex-A53 which is somewhat similar. +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// The following definitions describe the simpler per-operand machine model. +// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details. + +// Cavium ThunderX T8X scheduling machine model. +def ThunderXT8XModel : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops dispatched per cycle. + let MicroOpBufferSize = 0; // ThunderX T88/T81/T83 are in-order. + let LoadLatency = 3; // Optimistic load latency. + let MispredictPenalty = 8; // Branch mispredict penalty. + let CompleteModel = 1; +} + +// Modeling each pipeline with BufferSize == 0 since T8X is in-order. +def THXT8XUnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU +def THXT8XUnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC +def THXT8XUnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division +def THXT8XUnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store +def THXT8XUnitBr : ProcResource<1> { let BufferSize = 0; } // Branch +def THXT8XUnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU +def THXT8XUnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mul/Div/Sqrt + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedWrite types mapping the ProcResources and +// latencies. + +let SchedModel = ThunderXT8XModel in { + +// ALU +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 2; } + +// MAC +def : WriteRes { + let Latency = 4; + let ResourceCycles = [1]; +} + +def : WriteRes { + let Latency = 4; + let ResourceCycles = [1]; +} + +// Div +def : WriteRes { + let Latency = 12; + let ResourceCycles = [6]; +} + +def : WriteRes { + let Latency = 14; + let ResourceCycles = [8]; +} + +// Load +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } + +// Vector Load +def : WriteRes { + let Latency = 8; + let ResourceCycles = [3]; +} + +def THXT8XWriteVLD1 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 6; + let ResourceCycles = [1]; +} + +def THXT8XWriteVLD2 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 11; + let ResourceCycles = [7]; +} + +def THXT8XWriteVLD3 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 12; + let ResourceCycles = [8]; +} + +def THXT8XWriteVLD4 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 13; + let ResourceCycles = [9]; +} + +def THXT8XWriteVLD5 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 13; + let ResourceCycles = [9]; +} + +// Pre/Post Indexing +def : WriteRes { let Latency = 0; } + +// Store +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } + +// Vector Store +def : WriteRes { + let Latency = 1; + let ResourceCycles = [1]; +} + +def THXT8XWriteVST1 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 1; + let ResourceCycles = [1]; +} + +def THXT8XWriteVST2 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 10; + let ResourceCycles = [9]; +} + +def THXT8XWriteVST3 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 1; + let ResourceCycles = [10]; +} + +def : WriteRes { let Unsupported = 1; } + +// Branch +def : WriteRes { + let Latency = 1; + let ResourceCycles = [1]; +} + +def THXT8XWriteBR : SchedWriteRes<[THXT8XUnitBr]> { + let Latency = 1; + let ResourceCycles = [1]; +} + +def : WriteRes { + let Latency = 1; + let ResourceCycles = [1]; +} + +def THXT8XWriteBRR : SchedWriteRes<[THXT8XUnitBr]> { + let Latency = 1; + let ResourceCycles = [1]; +} + +def THXT8XWriteRET : SchedWriteRes<[THXT8XUnitALU]> { + let Latency = 1; + let ResourceCycles = [1]; +} + +def : WriteRes { + let Latency = 1; + let ResourceCycles = [1]; +} + +def : WriteRes { + let Latency = 1; + let ResourceCycles = [1]; +} + +def : WriteRes { + let Latency = 1; + let ResourceCycles = [1]; +} + +// FP ALU +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } + +// FP Mul, Div, Sqrt +def : WriteRes { let Latency = 6; } +def : WriteRes { + let Latency = 22; + let ResourceCycles = [19]; +} + +def THXT8XWriteFMAC : SchedWriteRes<[THXT8XUnitFPMDS]> { let Latency = 10; } + +def THXT8XWriteFDivSP : SchedWriteRes<[THXT8XUnitFPMDS]> { + let Latency = 12; + let ResourceCycles = [9]; +} + +def THXT8XWriteFDivDP : SchedWriteRes<[THXT8XUnitFPMDS]> { + let Latency = 22; + let ResourceCycles = [19]; +} + +def THXT8XWriteFSqrtSP : SchedWriteRes<[THXT8XUnitFPMDS]> { + let Latency = 17; + let ResourceCycles = [14]; +} + +def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> { + let Latency = 31; + let ResourceCycles = [28]; +} + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedRead types. + +// No forwarding for these reads. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// FIXME: This needs more targeted benchmarking. +// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable +// operands are needed one cycle later if and only if they are to be +// shifted. Otherwise, they too are needed two cycles later. This same +// ReadAdvance applies to Extended registers as well, even though there is +// a separate SchedPredicate for them. +def : ReadAdvance; +def THXT8XReadShifted : SchedReadAdvance<1, [WriteImm, WriteI, + WriteISReg, WriteIEReg, WriteIS, + WriteID32, WriteID64, + WriteIM32, WriteIM64]>; +def THXT8XReadNotShifted : SchedReadAdvance<2, [WriteImm, WriteI, + WriteISReg, WriteIEReg, WriteIS, + WriteID32, WriteID64, + WriteIM32, WriteIM64]>; +def THXT8XReadISReg : SchedReadVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; + +def THXT8XReadIEReg : SchedReadVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; + +// MAC - Operands are generally needed one cycle later in the MAC pipe. +// Accumulator operands are needed two cycles later. +def : ReadAdvance; +def : ReadAdvance; + +// Div +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Subtarget-specific InstRW. + +//--- +// Branch +//--- +def : InstRW<[THXT8XWriteBR], (instregex "^B")>; +def : InstRW<[THXT8XWriteBR], (instregex "^BL")>; +def : InstRW<[THXT8XWriteBR], (instregex "^B.*")>; +def : InstRW<[THXT8XWriteBR], (instregex "^BL")>; +def : InstRW<[THXT8XWriteBR], (instregex "^CBNZ")>; +def : InstRW<[THXT8XWriteBR], (instregex "^CBZ")>; +def : InstRW<[THXT8XWriteBR], (instregex "^TBNZ")>; +def : InstRW<[THXT8XWriteBR], (instregex "^TBZ")>; +def : InstRW<[THXT8XWriteBRR], (instregex "^BR")>; +def : InstRW<[THXT8XWriteBRR], (instregex "^BLR")>; + +//--- +// Ret +//--- +def : InstRW<[THXT8XWriteRET], (instregex "^RET")>; + +//--- +// Miscellaneous +//--- +def : InstRW<[WriteI], (instrs COPY)>; + +//--- +// Vector Loads +//--- +def : InstRW<[THXT8XWriteVLD1], (instregex "LD1i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[THXT8XWriteVLD1], (instregex "LD2i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[THXT8XWriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>; +def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; +def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; + +def : InstRW<[THXT8XWriteVLD2], (instregex "LD3i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[THXT8XWriteVLD3], (instregex "LD3Threev(2d)$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; + +def : InstRW<[THXT8XWriteVLD2], (instregex "LD4i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[THXT8XWriteVLD4], (instregex "LD4Fourv(2d)$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; + +//--- +// Vector Stores +//--- +def : InstRW<[THXT8XWriteVST1], (instregex "ST1i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[THXT8XWriteVST1], (instregex "ST2i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[THXT8XWriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[THXT8XWriteVST2], (instregex "ST3i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[THXT8XWriteVST2], (instregex "ST3Threev(2d)$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; + +def : InstRW<[THXT8XWriteVST2], (instregex "ST4i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[THXT8XWriteVST2], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; + +//--- +// Floating Point MAC, DIV, SQRT +//--- +def : InstRW<[THXT8XWriteFMAC], (instregex "^FN?M(ADD|SUB).*")>; +def : InstRW<[THXT8XWriteFMAC], (instregex "^FML(A|S).*")>; +def : InstRW<[THXT8XWriteFDivSP], (instrs FDIVSrr)>; +def : InstRW<[THXT8XWriteFDivDP], (instrs FDIVDrr)>; +def : InstRW<[THXT8XWriteFDivSP], (instregex "^FDIVv.*32$")>; +def : InstRW<[THXT8XWriteFDivDP], (instregex "^FDIVv.*64$")>; +def : InstRW<[THXT8XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; +def : InstRW<[THXT8XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; + +} Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -45,7 +45,11 @@ ExynosM1, Falkor, Kryo, - Vulcan + Vulcan, + ThunderX, + ThunderXT88, + ThunderXT81, + ThunderXT83 }; protected: Index: lib/Target/AArch64/AArch64Subtarget.cpp =================================================================== --- lib/Target/AArch64/AArch64Subtarget.cpp +++ lib/Target/AArch64/AArch64Subtarget.cpp @@ -84,6 +84,28 @@ case Vulcan: MaxInterleaveFactor = 4; break; + case ThunderX: + CacheLineSize = 128; + MaxPrefetchIterationsAhead = 8; + HasLSE = true; + PrefFunctionAlignment = 4; + PrefLoopAlignment = 4; + break; + case ThunderXT88: + CacheLineSize = 128; + HasLSE = true; + PrefFunctionAlignment = 4; + PrefLoopAlignment = 4; + break; + case ThunderXT81: + case ThunderXT83: + CacheLineSize = 128; + MaxPrefetchIterationsAhead = 16; + HasLSE = true; + HasV8_1aOps = true; + PrefFunctionAlignment = 4; + PrefLoopAlignment = 4; + break; case CortexA35: break; case CortexA53: break; case CortexA72: break;