Index: include/llvm/Support/AArch64TargetParser.def
===================================================================
--- include/llvm/Support/AArch64TargetParser.def
+++ include/llvm/Support/AArch64TargetParser.def
@@ -70,6 +70,9 @@
                 (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO))
 AARCH64_CPU_NAME("vulcan", AK_ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false,
                 (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO))
+AARCH64_CPU_NAME("thunderx", AK_ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false,
+                (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO))
+
 // Invalid CPU
 AARCH64_CPU_NAME("invalid", AK_INVALID, FK_INVALID, true, AArch64::AEK_INVALID)
 #undef AARCH64_CPU_NAME
Index: lib/Target/AArch64/AArch64.td
===================================================================
--- lib/Target/AArch64/AArch64.td
+++ lib/Target/AArch64/AArch64.td
@@ -146,6 +146,7 @@
 include "AArch64SchedM1.td"
 include "AArch64SchedKryo.td"
 include "AArch64SchedVulcan.td"
+include "AArch64SchedThunderX.td"
 
 def ProcA35     : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
                                    "Cortex-A35 ARM processors", [
@@ -254,6 +255,18 @@
                                    FeaturePredictableSelectIsExpensive,
                                    HasV8_1aOps]>;
 
+def ProcThunderX     : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",
+                                   "ThunderX ARM processors", [
+                                   FeatureCRC,
+                                   FeatureCrypto,
+                                   FeatureFPARMv8,
+                                   FeatureMergeNarrowLd,
+                                   FeatureNEON,
+                                   FeaturePerfMon,
+                                   FeaturePostRAScheduler,
+				   HasV8_1aOps]>;
+
+
 def : ProcessorModel<"generic", NoSchedModel, [
                      FeatureCRC,
                      FeatureFPARMv8,
@@ -274,6 +287,7 @@
 def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM1]>;
 def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
 def : ProcessorModel<"vulcan", VulcanModel, [ProcVulcan]>;
+def : ProcessorModel<"thunderx", ThunderXModel, [ProcThunderX]>;
 
 //===----------------------------------------------------------------------===//
 // Assembly parser
Index: lib/Target/AArch64/AArch64SchedThunderX.td
===================================================================
--- /dev/null
+++ lib/Target/AArch64/AArch64SchedThunderX.td
@@ -0,0 +1,289 @@
+//==- AArch64SchedThunderX.td - ThunderX Scheduling Definitions -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Cavium ThunderX processors.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See MCSchedModel.h for details.
+
+// ThunderX machine model for scheduling and other instruction cost heuristics.
+def ThunderXModel : SchedMachineModel {
+  let MicroOpBufferSize = 0; // Explicitly set to zero since ThunderX is in-order.
+  let IssueWidth = 2;        // 2 micro-ops are dispatched per cycle.
+  let LoadLatency = 3;       // Optimistic load latency assuming bypass.
+                             // This is overriden by OperandCycles if the
+                             // Itineraries are queried instead.
+  let MispredictPenalty = 7; 
+  let CompleteModel = 1;
+  let LoopMicroOpBufferSize = 8;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+def ThunderXUnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
+def ThunderXUnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC
+def ThunderXUnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division
+def ThunderXUnitLdSt   : ProcResource<1> { let BufferSize = 0; } // Load/Store
+def ThunderXUnitB      : ProcResource<1> { let BufferSize = 0; } // Branch
+def ThunderXUnitFPALU  : ProcResource<1> { let BufferSize = 0; } // FP ALU
+def ThunderXUnitFPMDS  : ProcResource<1> { let BufferSize = 0; } // FP Mult/Div/Sqrt
+
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types which both map the ProcResources and
+// set the latency.
+
+let SchedModel = ThunderXModel in {
+
+// ALU - Despite having a full latency of 4, most of the ALU instructions can
+//       forward a cycle earlier and then two cycles earlier in the case of a
+//       shift-only instruction. These latencies will be incorrect when the
+//       result cannot be forwarded, but modeling isn't rocket surgery.
+def : WriteRes<WriteImm, [ThunderXUnitALU]> { let Latency = 3; }
+def : WriteRes<WriteI, [ThunderXUnitALU]> { let Latency = 3; }
+def : WriteRes<WriteISReg, [ThunderXUnitALU]> { let Latency = 3; }
+def : WriteRes<WriteIEReg, [ThunderXUnitALU]> { let Latency = 3; }
+def : WriteRes<WriteIS, [ThunderXUnitALU]> { let Latency = 2; }
+def : WriteRes<WriteExtr, [ThunderXUnitALU]> { let Latency = 3; }
+
+// MAC
+def : WriteRes<WriteIM32, [ThunderXUnitMAC]> { let Latency = 4; }
+def : WriteRes<WriteIM64, [ThunderXUnitMAC]> { let Latency = 4; }
+
+// Div
+def : WriteRes<WriteID32, [ThunderXUnitDiv]> { let Latency = 15; }
+def : WriteRes<WriteID64, [ThunderXUnitDiv]> { let Latency = 15; }
+
+// Load
+def : WriteRes<WriteLD, [ThunderXUnitLdSt]> { let Latency = 3; }
+def : WriteRes<WriteLDIdx, [ThunderXUnitLdSt]> { let Latency = 3; }
+def : WriteRes<WriteLDHi, [ThunderXUnitLdSt]> { let Latency = 3; }
+
+// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd
+//               below, choosing the median of 3 which makes the latency 6.
+//               May model this more carefully in the future. The remaining
+//               ThunderXWriteVLD# types represent the 1-5 cycle issues explicitly.
+def : WriteRes<WriteVLD, [ThunderXUnitLdSt]> { let Latency = 6;
+                                          let ResourceCycles = [3]; }
+def ThunderXWriteVLD1 : SchedWriteRes<[ThunderXUnitLdSt]> { let Latency = 4; }
+def ThunderXWriteVLD2 : SchedWriteRes<[ThunderXUnitLdSt]> { let Latency = 12;
+                                                  let ResourceCycles = [8]; }
+def ThunderXWriteVLD3 : SchedWriteRes<[ThunderXUnitLdSt]> { let Latency = 14;
+                                                  let ResourceCycles = [10]; }
+def ThunderXWriteVLD4 : SchedWriteRes<[ThunderXUnitLdSt]> { let Latency = 16;
+                                                  let ResourceCycles = [12]; }
+def ThunderXWriteVLD5 : SchedWriteRes<[ThunderXUnitLdSt]> { let Latency = 16;
+                                                  let ResourceCycles = [12]; }
+
+// Pre/Post Indexing - Performed as part of address generation which is already
+//                     accounted for in the WriteST* latencies below
+def : WriteRes<WriteAdr, []> { let Latency = 0; }
+
+// Store
+def : WriteRes<WriteST, [ThunderXUnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTP, [ThunderXUnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTIdx, [ThunderXUnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTX, [ThunderXUnitLdSt]> { let Latency = 1; }
+
+// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
+def : WriteRes<WriteVST, [ThunderXUnitLdSt]> { let Latency = 5;
+                                          let ResourceCycles = [2];}
+def ThunderXWriteVST1 : SchedWriteRes<[ThunderXUnitLdSt]> { let Latency = 4; }
+def ThunderXWriteVST2 : SchedWriteRes<[ThunderXUnitLdSt]> { let Latency = 5;
+                                                  let ResourceCycles = [2]; }
+def ThunderXWriteVST3 : SchedWriteRes<[ThunderXUnitLdSt]> { let Latency = 6;
+                                                  let ResourceCycles = [3]; }
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// Branch
+def : WriteRes<WriteBr, [ThunderXUnitB]>;
+def : WriteRes<WriteBrReg, [ThunderXUnitB]>;
+def : WriteRes<WriteSys, [ThunderXUnitB]>;
+def : WriteRes<WriteBarrier, [ThunderXUnitB]>;
+def : WriteRes<WriteHint, [ThunderXUnitB]>;
+
+// FP ALU
+def : WriteRes<WriteF, [ThunderXUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCmp, [ThunderXUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCvt, [ThunderXUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCopy, [ThunderXUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFImm, [ThunderXUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteV, [ThunderXUnitFPALU]> { let Latency = 6; }
+
+// FP Mul, Div, Sqrt
+def : WriteRes<WriteFMul, [ThunderXUnitFPMDS]> { let Latency = 6; }
+def : WriteRes<WriteFDiv, [ThunderXUnitFPMDS]> { let Latency = 33;
+                                            let ResourceCycles = [29]; }
+def ThunderXWriteFMAC : SchedWriteRes<[ThunderXUnitFPMDS]> { let Latency = 10; }
+def ThunderXWriteFDivSP : SchedWriteRes<[ThunderXUnitFPMDS]> { let Latency = 18;
+                                                     let ResourceCycles = [14]; }
+def ThunderXWriteFDivDP : SchedWriteRes<[ThunderXUnitFPMDS]> { let Latency = 33;
+                                                     let ResourceCycles = [29]; }
+def ThunderXWriteFSqrtSP : SchedWriteRes<[ThunderXUnitFPMDS]> { let Latency = 17;
+                                                      let ResourceCycles = [13]; }
+def ThunderXWriteFSqrtDP : SchedWriteRes<[ThunderXUnitFPMDS]> { let Latency = 32;
+                                                      let ResourceCycles = [28]; }
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types.
+
+// No forwarding for these reads.
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
+//       operands are needed one cycle later if and only if they are to be
+//       shifted. Otherwise, they too are needed two cycles later. This same
+//       ReadAdvance applies to Extended registers as well, even though there is
+//       a separate SchedPredicate for them.
+def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
+                             WriteISReg, WriteIEReg,WriteIS,
+                             WriteID32,WriteID64,
+                             WriteIM32,WriteIM64]>;
+def ThunderXReadShifted : SchedReadAdvance<1, [WriteImm,WriteI,
+                                          WriteISReg, WriteIEReg,WriteIS,
+                                          WriteID32,WriteID64,
+                                          WriteIM32,WriteIM64]>;
+def ThunderXReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI,
+                                             WriteISReg, WriteIEReg,WriteIS,
+                                             WriteID32,WriteID64,
+                                             WriteIM32,WriteIM64]>;
+def ThunderXReadISReg : SchedReadVariant<[
+	SchedVar<RegShiftedPred, [ThunderXReadShifted]>,
+	SchedVar<NoSchedPred, [ThunderXReadNotShifted]>]>;
+def : SchedAlias<ReadISReg, ThunderXReadISReg>;
+
+def ThunderXReadIEReg : SchedReadVariant<[
+	SchedVar<RegExtendedPred, [ThunderXReadShifted]>,
+	SchedVar<NoSchedPred, [ThunderXReadNotShifted]>]>;
+def : SchedAlias<ReadIEReg, ThunderXReadIEReg>;
+
+// MAC - Operands are generally needed one cycle later in the MAC pipe.
+//       Accumulator operands are needed two cycles later.
+def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
+                              WriteISReg, WriteIEReg,WriteIS,
+                              WriteID32,WriteID64,
+                              WriteIM32,WriteIM64]>;
+def : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI,
+                               WriteISReg, WriteIEReg,WriteIS,
+                               WriteID32,WriteID64,
+                               WriteIM32,WriteIM64]>;
+
+// Div
+def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
+                              WriteISReg, WriteIEReg,WriteIS,
+                              WriteID32,WriteID64,
+                              WriteIM32,WriteIM64]>;
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific InstRWs.
+
+//---
+// Miscellaneous
+//---
+def : InstRW<[WriteI], (instrs COPY)>;
+
+//---
+// Vector Loads
+//---
+def : InstRW<[ThunderXWriteVLD1], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[ThunderXWriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ThunderXWriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ThunderXWriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ThunderXWriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ThunderXWriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ThunderXWriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[ThunderXWriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[ThunderXWriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[ThunderXWriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[ThunderXWriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[ThunderXWriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[ThunderXWriteVLD1], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[ThunderXWriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ThunderXWriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[ThunderXWriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[ThunderXWriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
+def : InstRW<[ThunderXWriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[ThunderXWriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
+def : InstRW<[ThunderXWriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
+
+def : InstRW<[ThunderXWriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[ThunderXWriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ThunderXWriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[ThunderXWriteVLD3], (instregex "LD3Threev(2d)$")>;
+def : InstRW<[ThunderXWriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[ThunderXWriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[ThunderXWriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[ThunderXWriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
+
+def : InstRW<[ThunderXWriteVLD2], (instregex "LD4i(8|16|32|64)$")>;
+def : InstRW<[ThunderXWriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ThunderXWriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[ThunderXWriteVLD4], (instregex "LD4Fourv(2d)$")>;
+def : InstRW<[ThunderXWriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[ThunderXWriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[ThunderXWriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[ThunderXWriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
+
+//---
+// Vector Stores
+//---
+def : InstRW<[ThunderXWriteVST1], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[ThunderXWriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ThunderXWriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ThunderXWriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ThunderXWriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[ThunderXWriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[ThunderXWriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[ThunderXWriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[ThunderXWriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[ThunderXWriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[ThunderXWriteVST1], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[ThunderXWriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[ThunderXWriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[ThunderXWriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[ThunderXWriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[ThunderXWriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[ThunderXWriteVST2], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[ThunderXWriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[ThunderXWriteVST2], (instregex "ST3Threev(2d)$")>;
+def : InstRW<[ThunderXWriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[ThunderXWriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[ThunderXWriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
+
+def : InstRW<[ThunderXWriteVST2], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[ThunderXWriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[ThunderXWriteVST2], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[ThunderXWriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[ThunderXWriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[ThunderXWriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+
+//---
+// Floating Point MAC, DIV, SQRT
+//---
+def : InstRW<[ThunderXWriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
+def : InstRW<[ThunderXWriteFMAC], (instregex "^FML(A|S).*")>;
+def : InstRW<[ThunderXWriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[ThunderXWriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[ThunderXWriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[ThunderXWriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[ThunderXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[ThunderXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+
+}
Index: lib/Target/AArch64/AArch64Subtarget.h
===================================================================
--- lib/Target/AArch64/AArch64Subtarget.h
+++ lib/Target/AArch64/AArch64Subtarget.h
@@ -44,7 +44,8 @@
     Cyclone,
     ExynosM1,
     Kryo,
-    Vulcan
+    Vulcan,
+    ThunderX
   };
 
 protected:
Index: lib/Target/AArch64/AArch64Subtarget.cpp
===================================================================
--- lib/Target/AArch64/AArch64Subtarget.cpp
+++ lib/Target/AArch64/AArch64Subtarget.cpp
@@ -74,6 +74,15 @@
     MinPrefetchStride = 1024;
     MaxPrefetchIterationsAhead = 11;
     break;
+  case ThunderX:
+    CacheLineSize = 128;
+    PrefetchDistance = 256;
+    PrefFunctionAlignment = 5;
+    PrefLoopAlignment = 5;
+    MinPrefetchStride = 8;
+    MaxPrefetchIterationsAhead = 7;
+    MergeNarrowLoads = true;
+    break;
   case Vulcan:
     MaxInterleaveFactor = 4;
     break;
Index: test/CodeGen/AArch64/cpus.ll
===================================================================
--- test/CodeGen/AArch64/cpus.ll
+++ test/CodeGen/AArch64/cpus.ll
@@ -11,6 +11,7 @@
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=exynos-m2 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=kryo 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=vulcan 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=thunderx 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
 
 ; CHECK-NOT: {{.*}}  is not a recognized processor for this target
Index: test/CodeGen/AArch64/remat.ll
===================================================================
--- test/CodeGen/AArch64/remat.ll
+++ test/CodeGen/AArch64/remat.ll
@@ -7,6 +7,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=exynos-m2 -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=kryo -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=vulcan -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=thunderx -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mattr=+custom-cheap-as-move -o - %s | FileCheck %s
 
 %X = type { i64, i64, i64 }