Index: include/llvm/Target/Target.td
===================================================================
--- include/llvm/Target/Target.td
+++ include/llvm/Target/Target.td
@@ -1170,6 +1170,10 @@
   //
   string Value = v;
 
+  // Take the smallest value for non-boolean values, otherwise the biggest
+  // one is taken.
+  bit UseMinValue = 0;
+
   // Desc - Feature description.  Used by command line (-mattr=) to display help
   // information.
   //
Index: lib/Target/AArch64/AArch64.td
===================================================================
--- lib/Target/AArch64/AArch64.td
+++ lib/Target/AArch64/AArch64.td
@@ -58,6 +58,65 @@
                                          "Reserve X18, making it unavailable "
                                          "as a GPR">;
 
+def FeatureMergeNarrowLd : SubtargetFeature<"merge-narrow-ld",
+                                            "MergeNarrowLoads", "true",
+                                            "Merge narrow load instructions">;
+
+def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
+                                    "Use alias analysis during codegen">;
+
+def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps",
+    "true",
+    "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">;
+
+def FeaturePredicatbleSelectIsExpensive : SubtargetFeature<
+    "predictable-select-expensive", "PredictableSelectIsExpensive", "true",
+    "Prefer likely predicted branches over selects">;
+
+def FeatureInterleaveFactor4 : SubtargetFeature<"interleave-factor-4",
+    "MaxInterleaveFactor", "4",
+    "Set vectorizer maximum interleave factor to 4">;
+
+def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move",
+    "CustomAsCheapAsMove", "true",
+    "Use custom code for TargetInstrInfo::isAsCheapAsAMove()">;
+
+def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
+    "UsePostRAScheduler", "true", "Schedule again after register allocation">;
+
+def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
+    "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">;
+
+def FeatureAvoidQuadLdStPairs : SubtargetFeature<"no-quad-ldst-pairs",
+    "AvoidQuadLdStPairs", "true",
+    "Do not form quad load/store pair operations">;
+
+def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
+    "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
+    "true", "Use alternative pattern for sextload convert to f32">;
+
+def FeatureMacroOpFusion : SubtargetFeature<
+    "macroop-fusion", "HasMacroOpFusion", "true",
+    "CPU supports macro op fusion">;
+
+def FeatureVectorInsertExtractCost2 : SubtargetFeature<
+    "vector-insert-extract-cost-2", "VectorInsertExtractBaseCost", "2",
+    "Most vector insert/extract operations have cost 2"> {
+  let UseMinValue = 1;
+}
+
+def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
+    "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
+    "Disable latency scheduling heuristic">;
+
+def FeatureUseRSqrt : SubtargetFeature<
+    "use-reverse-square-root", "UseRSqrt", "true", "Use reverse square root">;
+
+def FeatureCycloneLikeSoftwarePrefetch : SubtargetFeature<
+    "software-prefetch-cyclone", "SoftwarePrefetchMode",
+    "SoftwarePrefetchCyclone",
+    "Use software prefetch settings for cyclone like CPU">;
+
 //===----------------------------------------------------------------------===//
 // Architectures.
 //
@@ -94,57 +153,91 @@
 include "AArch64SchedKryo.td"
 
 def ProcA35     : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
-                                   "Cortex-A35 ARM processors",
-                                   [FeatureFPARMv8,
-                                   FeatureNEON,
-                                   FeatureCrypto,
+                                   "Cortex-A35 ARM processors", [
                                    FeatureCRC,
-                                   FeaturePerfMon]>;
+                                   FeatureCrypto,
+                                   FeatureFPARMv8,
+                                   FeatureNEON,
+                                   FeaturePerfMon
+                                   ]>;
 
 def ProcA53     : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
-                                   "Cortex-A53 ARM processors",
-                                   [FeatureFPARMv8,
-                                   FeatureNEON,
-                                   FeatureCrypto,
+                                   "Cortex-A53 ARM processors", [
+                                   FeatureBalanceFPOps,
                                    FeatureCRC,
-                                   FeaturePerfMon]>;
+                                   FeatureCrypto,
+                                   FeatureCustomCheapAsMoveHandling,
+                                   FeatureFPARMv8,
+                                   FeatureNEON,
+                                   FeaturePerfMon,
+                                   FeaturePostRAScheduler,
+                                   FeatureUseAA
+                                   ]>;
 
 def ProcA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
-                                   "Cortex-A57 ARM processors",
-                                   [FeatureFPARMv8,
-                                   FeatureNEON,
-                                   FeatureCrypto,
+                                   "Cortex-A57 ARM processors", [
+                                   FeatureBalanceFPOps,
                                    FeatureCRC,
-                                   FeaturePerfMon]>;
+                                   FeatureCrypto,
+                                   FeatureCustomCheapAsMoveHandling,
+                                   FeatureFPARMv8,
+                                   FeatureInterleaveFactor4,
+                                   FeatureMergeNarrowLd,
+                                   FeatureNEON,
+                                   FeaturePerfMon,
+                                   FeaturePostRAScheduler,
+                                   FeaturePredicatbleSelectIsExpensive
+                                   ]>;
 
 def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
-                                   "Cyclone",
-                                   [FeatureFPARMv8,
-                                   FeatureNEON,
+                                   "Cyclone", [
+                                   FeatureAlternateSExtLoadCVTF32Pattern,
                                    FeatureCrypto,
+                                   FeatureCycloneLikeSoftwarePrefetch,
+                                   FeatureDisableLatencySchedHeuristic,
+                                   FeatureFPARMv8,
+                                   FeatureMacroOpFusion,
+                                   FeatureNEON,
                                    FeaturePerfMon,
-                                   FeatureZCRegMove, FeatureZCZeroing]>;
+                                   FeatureSlowMisaligned128Store,
+                                   FeatureZCRegMove,
+                                   FeatureZCZeroing
+                                   ]>;
 
 def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
-                                    "Samsung Exynos-M1 processors",
-                                    [FeatureFPARMv8,
-                                    FeatureNEON,
-                                    FeatureCrypto,
+                                    "Samsung Exynos-M1 processors", [
+                                    FeatureAvoidQuadLdStPairs,
                                     FeatureCRC,
-                                    FeaturePerfMon]>;
+                                    FeatureCrypto,
+                                    FeatureCustomCheapAsMoveHandling,
+                                    FeatureFPARMv8,
+                                    FeatureNEON,
+                                    FeaturePerfMon,
+                                    FeatureUseRSqrt
+                                    ]>;
 
 def ProcKryo    : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
-                                   "Qualcomm Kryo processors",
-                                   [FeatureFPARMv8,
-                                   FeatureNEON,
-                                   FeatureCrypto,
+                                   "Qualcomm Kryo processors", [
                                    FeatureCRC,
-                                   FeaturePerfMon]>;
-
-def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
-                                              FeatureNEON,
-                                              FeatureCRC,
-                                              FeaturePerfMon]>;
+                                   FeatureCrypto,
+                                   FeatureCustomCheapAsMoveHandling,
+                                   FeatureFPARMv8,
+                                   FeatureInterleaveFactor4,
+                                   FeatureMergeNarrowLd,
+                                   FeatureNEON,
+                                   FeaturePerfMon,
+                                   FeaturePostRAScheduler,
+                                   FeaturePredicatbleSelectIsExpensive,
+                                   FeatureVectorInsertExtractCost2
+                                   ]>;
+
+def : ProcessorModel<"generic", NoSchedModel, [
+                     FeatureCRC,
+                     FeatureFPARMv8,
+                     FeatureNEON,
+                     FeaturePerfMon,
+                     FeaturePostRAScheduler
+                     ]>;
 
 // FIXME: Cortex-A35 is currently modelled as a Cortex-A53
 def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
Index: lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
===================================================================
--- lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -314,9 +314,7 @@
   if (skipFunction(*F.getFunction()))
     return false;
 
-  // Don't do anything if this isn't an A53 or A57.
-  if (!(F.getSubtarget<AArch64Subtarget>().isCortexA53() ||
-        F.getSubtarget<AArch64Subtarget>().isCortexA57()))
+  if (!F.getSubtarget<AArch64Subtarget>().balanceFPOps())
     return false;
 
   bool Changed = false;
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -634,9 +634,7 @@
     }
   }
 
-  // Prefer likely predicted branches to selects on out-of-order cores.
-  if (Subtarget->isCortexA57() || Subtarget->isKryo())
-    PredictableSelectIsExpensive = true;
+  PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
 }
 
 void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
@@ -819,7 +817,7 @@
     // FIXME: Define an attribute for slow unaligned accesses instead of
     // relying on the CPU type as a proxy.
     // On Cyclone, unaligned 128-bit stores are slow.
-    *Fast = !Subtarget->isCyclone() || VT.getStoreSize() != 16 ||
+    *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
             // See comments in performSTORECombine() for more details about
             // these conditions.
 
@@ -8814,7 +8812,7 @@
 
   // Cyclone has bad performance on unaligned 16B stores when crossing line and
   // page boundaries. We want to split such stores.
-  if (!Subtarget->isCyclone())
+  if (!Subtarget->isMisaligned128StoreSlow())
     return SDValue();
 
   // Don't split at -Oz.
Index: lib/Target/AArch64/AArch64InstrInfo.cpp
===================================================================
--- lib/Target/AArch64/AArch64InstrInfo.cpp
+++ lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -544,8 +544,7 @@
 // FIXME: this implementation should be micro-architecture dependent, so a
 // micro-architecture target hook should be introduced here in future.
 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
-  if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53() &&
-      !Subtarget.isExynosM1() && !Subtarget.isKryo())
+  if (!Subtarget.hasCustomCheapAsMoveHandling())
     return MI->isAsCheapAsAMove();
 
   unsigned Imm;
@@ -559,7 +558,7 @@
   case AArch64::ADDXri:
   case AArch64::SUBWri:
   case AArch64::SUBXri:
-    return (Subtarget.isExynosM1() ||
+    return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
             MI->getOperand(3).getImm() == 0);
 
   // add/sub on register with shift
@@ -568,7 +567,7 @@
   case AArch64::SUBWrs:
   case AArch64::SUBXrs:
     Imm = MI->getOperand(3).getImm();
-    return (Subtarget.isExynosM1() &&
+    return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
             AArch64_AM::getArithShiftValue(Imm) < 4);
 
   // logical ops on immediate
@@ -609,7 +608,7 @@
   case AArch64::ORRWrs:
   case AArch64::ORRXrs:
     Imm = MI->getOperand(3).getImm();
-    return (Subtarget.isExynosM1() &&
+    return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
             AArch64_AM::getShiftValue(Imm) < 4 &&
             AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);
 
@@ -1523,7 +1522,7 @@
     return false;
 
   // Do not pair quad ld/st for Exynos.
-  if (Subtarget.isExynosM1()) {
+  if (Subtarget.avoidQuadLdStPairs()) {
     switch (MI->getOpcode()) {
     default:
       break;
@@ -1801,7 +1800,7 @@
 
 bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
                                               MachineInstr *Second) const {
-  if (Subtarget.isCyclone()) {
+  if (Subtarget.hasMacroOpFusion()) {
     // Cyclone can fuse CMN, CMP, TST followed by Bcc.
     unsigned SecondOpcode = Second->getOpcode();
     if (SecondOpcode == AArch64::Bcc) {
Index: lib/Target/AArch64/AArch64InstrInfo.td
===================================================================
--- lib/Target/AArch64/AArch64InstrInfo.td
+++ lib/Target/AArch64/AArch64InstrInfo.td
@@ -34,7 +34,8 @@
 
 def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
 def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
-def IsCyclone        : Predicate<"Subtarget->isCyclone()">;
+def UseAlternateSExtLoadCVTF32
+    : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
 
 //===----------------------------------------------------------------------===//
 // AArch64-specific DAG Nodes.
@@ -4957,7 +4958,8 @@
                                     0),
                                   dsub)),
                                0),
-                             ssub)))>, Requires<[NotForCodeSize, IsCyclone]>;
+                             ssub)))>,
+    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
 
 def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
                           (LDRBroW  GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
@@ -5010,7 +5012,8 @@
                                      0),
                                    dsub)),
                                0),
-                             dsub)))>, Requires<[NotForCodeSize, IsCyclone]>;
+                             dsub)))>,
+    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
  
 def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
                            (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
Index: lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
===================================================================
--- lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -160,10 +160,6 @@
   // Find and promote load instructions which read directly from store.
   bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
 
-  // Check if converting two narrow loads into a single wider load with
-  // bitfield extracts could be enabled.
-  bool enableNarrowLdMerge(MachineFunction &Fn);
-
   bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt);
 
   bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -1912,15 +1908,6 @@
   return Modified;
 }
 
-bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) {
-  bool ProfitableArch = Subtarget->isCortexA57() || Subtarget->isKryo();
-  // FIXME: The benefit from converting narrow loads into a wider load could be
-  // microarchitectural as it assumes that a single load with two bitfield
-  // extracts is cheaper than two narrow loads. Currently, this conversion is
-  // enabled only in cortex-a57 on which performance benefits were verified.
-  return ProfitableArch && !Subtarget->requiresStrictAlign();
-}
-
 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   if (skipFunction(*Fn.getFunction()))
     return false;
@@ -1936,7 +1923,8 @@
   UsedRegs.resize(TRI->getNumRegs());
 
   bool Modified = false;
-  bool enableNarrowLdOpt = enableNarrowLdMerge(Fn);
+  bool enableNarrowLdOpt =
+    Subtarget->mergeNarrowLoads() && !Subtarget->requiresStrictAlign();
   for (auto &MBB : Fn)
     Modified |= optimizeBlock(MBB, enableNarrowLdOpt);
 
Index: lib/Target/AArch64/AArch64Subtarget.h
===================================================================
--- lib/Target/AArch64/AArch64Subtarget.h
+++ lib/Target/AArch64/AArch64Subtarget.h
@@ -33,8 +33,8 @@
 class Triple;
 
 class AArch64Subtarget : public AArch64GenSubtargetInfo {
-protected:
-  enum ARMProcFamilyEnum {
+public:
+  enum ARMProcFamilyEnum : uint8_t {
     Others,
     CortexA35,
     CortexA53,
@@ -44,6 +44,12 @@
     Kryo
   };
 
+  enum SoftwarePrefetchModeEnum : uint8_t {
+    NoSoftwarePrefetch,
+    SoftwarePrefetchCyclone,
+  };
+
+protected:
   /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
   ARMProcFamilyEnum ARMProcFamily = Others;
 
@@ -66,6 +72,25 @@
 
   // StrictAlign - Disallow unaligned memory accesses.
   bool StrictAlign = false;
+  bool MergeNarrowLoads = false;
+  bool UseAA = false;
+  bool PredictableSelectIsExpensive = false;
+  bool BalanceFPOps = false;
+  bool CustomAsCheapAsMove = false;
+  bool UsePostRAScheduler = false;
+  bool Misaligned128StoreIsSlow = false;
+  bool AvoidQuadLdStPairs = false;
+  bool UseAlternateSExtLoadCVTF32Pattern = false;
+  bool HasMacroOpFusion = false;
+  bool DisableLatencySchedHeuristic = false;
+  bool UseRSqrt = false;
+  uint8_t MaxInterleaveFactor = 2;
+  uint8_t VectorInsertExtractBaseCost = 3;
+  SoftwarePrefetchModeEnum SoftwarePrefetchMode = NoSoftwarePrefetch;
+  uint16_t CacheLineSize = 0;
+  uint16_t PrefetchDistance = 0;
+  uint16_t MinPrefetchStride = 1;
+  unsigned MaxPrefetchIterationsAhead = UINT_MAX;
 
   // ReserveX18 - X18 is not available as a general purpose register.
   bool ReserveX18;
@@ -123,7 +148,14 @@
   const Triple &getTargetTriple() const { return TargetTriple; }
   bool enableMachineScheduler() const override { return true; }
   bool enablePostRAScheduler() const override {
-    return isGeneric() || isCortexA53() || isCortexA57() || isKryo();
+    return UsePostRAScheduler;
+  }
+
+  /// Returns ARM processor family.
+  /// Please avoid this function! The preferred way to handle CPU specifics
+  /// is using a SubtargetFeature.
+  ARMProcFamilyEnum getProcFamily() const {
+    return ARMProcFamily;
   }
 
   bool hasV8_1aOps() const { return HasV8_1aOps; }
@@ -140,6 +172,30 @@
   bool hasNEON() const { return HasNEON; }
   bool hasCrypto() const { return HasCrypto; }
   bool hasCRC() const { return HasCRC; }
+  bool mergeNarrowLoads() const { return MergeNarrowLoads; }
+  bool balanceFPOps() const { return BalanceFPOps; }
+  bool predictableSelectIsExpensive() const {
+    return PredictableSelectIsExpensive;
+  }
+  bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; }
+  bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; }
+  bool avoidQuadLdStPairs() const { return AvoidQuadLdStPairs; }
+  bool useAlternateSExtLoadCVTF32Pattern() const {
+    return UseAlternateSExtLoadCVTF32Pattern;
+  }
+  bool hasMacroOpFusion() const { return HasMacroOpFusion; }
+  bool useRSqrt() const { return UseRSqrt; }
+  unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
+  unsigned getVectorInsertExtractBaseCost() const {
+    return VectorInsertExtractBaseCost;
+  }
+  unsigned getCacheLineSize() const { return CacheLineSize; }
+  unsigned getPrefetchDistance() const { return PrefetchDistance; }
+  unsigned getMinPrefetchStride() const { return MinPrefetchStride; }
+  unsigned getMaxPrefetchIterationsAhead() const {
+    return MaxPrefetchIterationsAhead;
+  }
+
   /// CPU has TBI (top byte of addresses is ignored during HW address
   /// translation) and OS enables it.
   bool supportsAddressTopByteIgnored() const;
@@ -160,14 +216,7 @@
   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
 
-  bool isGeneric() const { return CPUString == "generic"; }
-  bool isCyclone() const { return ARMProcFamily == Cyclone; }
-  bool isCortexA57() const { return ARMProcFamily == CortexA57; }
-  bool isCortexA53() const { return ARMProcFamily == CortexA53; }
-  bool isExynosM1() const { return ARMProcFamily == ExynosM1; }
-  bool isKryo() const { return ARMProcFamily == Kryo; }
-
-  bool useAA() const override { return isCortexA53(); }
+  bool useAA() const override { return UseAA; }
 
   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
   /// that still makes it profitable to inline the call.
Index: lib/Target/AArch64/AArch64Subtarget.cpp
===================================================================
--- lib/Target/AArch64/AArch64Subtarget.cpp
+++ lib/Target/AArch64/AArch64Subtarget.cpp
@@ -44,6 +44,19 @@
     CPUString = "generic";
 
   ParseSubtargetFeatures(CPUString, FS);
+
+  switch (SoftwarePrefetchMode) {
+  case SoftwarePrefetchCyclone:
+    CacheLineSize = 64;
+    PrefetchDistance = 280;
+    MinPrefetchStride = 2048;
+    MaxPrefetchIterationsAhead = 3;
+    break;
+  case NoSoftwarePrefetch:
+    /* stay with defaults */
+    break;
+  }
+
   return *this;
 }
 
@@ -115,8 +128,7 @@
   // Enabling or Disabling the latency heuristic is a close call: It seems to
   // help nearly no benchmark on out-of-order architectures, on the other hand
   // it regresses register pressure on a few benchmarking.
-  if (isCyclone())
-    Policy.DisableLatencyHeuristic = true;
+  Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
 }
 
 bool AArch64Subtarget::enableEarlyIfConversion() const {
@@ -138,8 +150,5 @@
 
 std::unique_ptr<PBQPRAConstraint>
 AArch64Subtarget::getCustomPBQPConstraints() const {
-  if (!isCortexA57())
-    return nullptr;
-
-  return llvm::make_unique<A57ChainingConstraint>();
+  return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;
 }
Index: lib/Target/AArch64/AArch64TargetMachine.cpp
===================================================================
--- lib/Target/AArch64/AArch64TargetMachine.cpp
+++ lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -147,8 +147,7 @@
   // (52 mantissa bits) are 2 and 3, respectively.
   unsigned ExtraStepsF = 2,
            ExtraStepsD = ExtraStepsF + 1;
-  // FIXME: Enable x^-1/2 only for Exynos M1 at the moment.
-  bool UseRsqrt = ST.isExynosM1();
+  bool UseRsqrt = ST.useRSqrt();
 
   TM.Options.Reciprocals.setDefaults("sqrtf", UseRsqrt, ExtraStepsF);
   TM.Options.Reciprocals.setDefaults("sqrtd", UseRsqrt, ExtraStepsD);
Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -368,9 +368,7 @@
   }
 
   // All other insert/extracts cost this much.
-  if (ST->isKryo())
-    return 2;
-  return 3;
+  return ST->getVectorInsertExtractBaseCost();
 }
 
 int AArch64TTIImpl::getArithmeticInstrCost(
@@ -529,9 +527,7 @@
 }
 
 unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
-  if (ST->isCortexA57() || ST->isKryo())
-    return 4;
-  return 2;
+  return ST->getMaxInterleaveFactor();
 }
 
 void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
@@ -630,28 +626,17 @@
 }
 
 unsigned AArch64TTIImpl::getCacheLineSize() {
-  if (ST->isCyclone())
-    return 64;
-  return BaseT::getCacheLineSize();
+  return ST->getCacheLineSize();
 }
 
 unsigned AArch64TTIImpl::getPrefetchDistance() {
-  if (ST->isCyclone())
-    return 280;
-  return BaseT::getPrefetchDistance();
+  return ST->getPrefetchDistance();
 }
 
 unsigned AArch64TTIImpl::getMinPrefetchStride() {
-  if (ST->isCyclone())
-    // The HW prefetcher handles accesses with strides up to 2KB.
-    return 2048;
-  return BaseT::getMinPrefetchStride();
+  return ST->getMinPrefetchStride();
 }
 
 unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
-  if (ST->isCyclone())
-    // Be conservative for now and don't prefetch ahead too much since the loop
-    // may terminate early.
-    return 3;
-  return BaseT::getMaxPrefetchIterationsAhead();
+  return ST->getMaxPrefetchIterationsAhead();
 }
Index: utils/TableGen/SubtargetEmitter.cpp
===================================================================
--- utils/TableGen/SubtargetEmitter.cpp
+++ utils/TableGen/SubtargetEmitter.cpp
@@ -1366,6 +1366,7 @@
     const std::string &Instance = R->getName();
     const std::string &Value = R->getValueAsString("Value");
     const std::string &Attribute = R->getValueAsString("Attribute");
+    bool UseMinValue = R->getValueAsBit("UseMinValue");
 
     if (Value=="true" || Value=="false")
       OS << "  if (Bits[" << Target << "::"
@@ -1374,7 +1375,7 @@
     else
       OS << "  if (Bits[" << Target << "::"
          << Instance << "] && "
-         << Attribute << " < " << Value << ") "
+         << Attribute << (UseMinValue ? " > " : " < ") << Value << ") "
          << Attribute << " = " << Value << ";\n";
   }