Index: lib/Target/AMDGPU/AMDGPU.td
===================================================================
--- lib/Target/AMDGPU/AMDGPU.td
+++ lib/Target/AMDGPU/AMDGPU.td
@@ -8,23 +8,12 @@
 //===------------------------------------------------------------===//
 
 include "llvm/Target/Target.td"
+include "AMDGPUFeatures.td"
 
 //===------------------------------------------------------------===//
 // Subtarget Features (device properties)
 //===------------------------------------------------------------===//
 
-def FeatureFP64 : SubtargetFeature<"fp64",
-  "FP64",
-  "true",
-  "Enable double precision operations"
->;
-
-def FeatureFMA : SubtargetFeature<"fmaf",
-  "FMA",
-  "true",
-  "Enable single precision FMA (not as fast as mul+add, but fused)"
->;
-
 def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
   "FastFMAF32",
   "true",
@@ -43,30 +32,6 @@
   "Most fp64 instructions are half rate instead of quarter"
 >;
 
-def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
-  "R600ALUInst",
-  "false",
-  "Older version of ALU instructions encoding"
->;
-
-def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
-  "HasVertexCache",
-  "true",
-  "Specify use of dedicated vertex cache"
->;
-
-def FeatureCaymanISA : SubtargetFeature<"caymanISA",
-  "CaymanISA",
-  "true",
-  "Use Cayman ISA"
->;
-
-def FeatureCFALUBug : SubtargetFeature<"cfalubug",
-  "CFALUBug",
-  "true",
-  "GPU has CF_ALU bug"
->;
-
 def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
   "FlatAddressSpace",
   "true",
@@ -152,27 +117,6 @@
   "VI SGPR initialization bug requiring a fixed SGPR allocation size"
 >;
 
-class SubtargetFeatureFetchLimit <string Value> :
-                          SubtargetFeature <"fetch"#Value,
-  "TexVTXClauseSize",
-  Value,
-  "Limit the maximum number of fetches in a clause to "#Value
->;
-
-def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
-def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
-
-class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
-  "wavefrontsize"#Value,
-  "WavefrontSize",
-  !cast<string>(Value),
-  "The number of threads per wavefront"
->;
-
-def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
-def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
-def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
-
 class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
   "ldsbankcount"#Value,
   "LDSBankCount",
@@ -183,19 +127,6 @@
 def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
 def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
 
-class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
-  "localmemorysize"#Value,
-  "LocalMemorySize",
-  !cast<string>(Value),
-  "The size of local memory in bytes"
->;
-
-def FeatureGCN : SubtargetFeature<"gcn",
-  "IsGCN",
-  "true",
-  "GCN or newer GPU"
->;
-
 def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
   "GCN3Encoding",
   "true",
@@ -368,12 +299,6 @@
   [FeatureFP64FP16Denormals]
 >;
 
-def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
-  "DX10Clamp",
-  "true",
-  "clamp modifier clamps NaNs to 0.0"
->;
-
 def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
   "FPExceptions",
   "true",
@@ -416,12 +341,6 @@
   "Dump MachineInstrs in the CodeEmitter"
 >;
 
-def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
-  "EnablePromoteAlloca",
-  "true",
-  "Enable promote alloca pass"
->;
-
 // XXX - This should probably be removed once enabled by default
 def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
   "EnableLoadStoreOpt",
@@ -485,45 +404,29 @@
   "Dummy feature to disable assembler instructions"
 >;
 
-class SubtargetFeatureGeneration <string Value,
-                                  list<SubtargetFeature> Implies> :
-        SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
-                          Value#" GPU generation", Implies>;
-
-def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
-def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
-def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
-
-def FeatureR600 : SubtargetFeatureGeneration<"R600",
-  [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
->;
-
-def FeatureR700 : SubtargetFeatureGeneration<"R700",
-  [FeatureFetchLimit16, FeatureLocalMemorySize0]
->;
-
-def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
-  [FeatureFetchLimit16, FeatureLocalMemorySize32768]
+def FeatureGCN : SubtargetFeature<"gcn",
+  "IsGCN",
+  "true",
+  "GCN or newer GPU"
 >;
 
-def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
-  [FeatureFetchLimit16, FeatureWavefrontSize64,
-   FeatureLocalMemorySize32768]
->;
+class AMDGPUSubtargetFeatureGeneration <string Value,
+                                  list<SubtargetFeature> Implies> :
+        SubtargetFeatureGeneration <Value, "AMDGPUSubtarget", Implies>;
 
-def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
+def FeatureSouthernIslands : AMDGPUSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
   [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
   FeatureWavefrontSize64, FeatureGCN,
   FeatureLDSBankCount32, FeatureMovrel]
 >;
 
-def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
+def FeatureSeaIslands : AMDGPUSubtargetFeatureGeneration<"SEA_ISLANDS",
   [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
   FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
   FeatureCIInsts, FeatureMovrel]
 >;
 
-def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
+def FeatureVolcanicIslands : AMDGPUSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
   [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
    FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
    FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
@@ -534,7 +437,7 @@
   ]
 >;
 
-def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
+def FeatureGFX9 : AMDGPUSubtargetFeatureGeneration<"GFX9",
   [FeatureFP64, FeatureLocalMemorySize65536,
    FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
    FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
@@ -744,8 +647,6 @@
 // Predicate helper class
 //===----------------------------------------------------------------------===//
 
-def TruePredicate : Predicate<"true">;
-
 def isSICI : Predicate<
   "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
   "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
@@ -837,36 +738,15 @@
 def EnableLateCFGStructurize : Predicate<
   "EnableLateStructurizeCFG">;
 
-// Exists to help track down where SubtargetPredicate isn't set rather
-// than letting tablegen crash with an unhelpful error.
-def InvalidPred : Predicate<"predicate not set on instruction or pattern">;
-
-class PredicateControl {
-  Predicate SubtargetPredicate = InvalidPred;
-  Predicate SIAssemblerPredicate = isSICI;
-  Predicate VIAssemblerPredicate = isVI;
-  list<Predicate> AssemblerPredicates = [];
-  Predicate AssemblerPredicate = TruePredicate;
-  list<Predicate> OtherPredicates = [];
-  list<Predicate> Predicates = !listconcat([SubtargetPredicate,
-                                            AssemblerPredicate],
-                                            AssemblerPredicates,
-                                            OtherPredicates);
-}
-
-class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
-  PredicateControl;
-
-
 // Include AMDGPU TD files
-include "R600Schedule.td"
-include "R600Processors.td"
 include "SISchedule.td"
 include "GCNProcessors.td"
 include "AMDGPUInstrInfo.td"
 include "AMDGPUIntrinsics.td"
+include "SIIntrinsics.td"
 include "AMDGPURegisterInfo.td"
 include "AMDGPURegisterBanks.td"
 include "AMDGPUInstructions.td"
+include "SIInstrInfo.td"
 include "AMDGPUCallingConv.td"
 include "AMDGPUSearchableTables.td"
Index: lib/Target/AMDGPU/AMDGPUCallingConv.td
===================================================================
--- lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -85,17 +85,6 @@
   ]>>
 ]>;
 
-// Calling convention for R600
-def CC_R600 : CallingConv<[
-  CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
-    T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
-    T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
-    T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
-    T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
-    T30_XYZW, T31_XYZW, T32_XYZW
-  ]>>>
-]>;
-
 // Calling convention for compute kernels
 def CC_AMDGPU_Kernel : CallingConv<[
   CCCustom<"allocateKernArg">
@@ -165,9 +154,5 @@
    CCIf<"static_cast<const AMDGPUSubtarget&>"
          "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
            "AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C",
-        CCDelegateTo<CC_AMDGPU_Func>>,
-   CCIf<"static_cast<const AMDGPUSubtarget&>"
-          "(State.getMachineFunction().getSubtarget()).getGeneration() < "
-            "AMDGPUSubtarget::SOUTHERN_ISLANDS",
-        CCDelegateTo<CC_R600>>
+        CCDelegateTo<CC_AMDGPU_Func>>
 ]>;
Index: lib/Target/AMDGPU/AMDGPUFeatures.td
===================================================================
--- /dev/null
+++ lib/Target/AMDGPU/AMDGPUFeatures.td
@@ -0,0 +1,52 @@
+
+def FeatureFP64 : SubtargetFeature<"fp64",
+  "FP64",
+  "true",
+  "Enable double precision operations"
+>;
+
+def FeatureFMA : SubtargetFeature<"fmaf",
+  "FMA",
+  "true",
+  "Enable single precision FMA (not as fast as mul+add, but fused)"
+>;
+
+class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
+  "localmemorysize"#Value,
+  "LocalMemorySize",
+  !cast<string>(Value),
+  "The size of local memory in bytes"
+>;
+
+def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
+def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
+def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
+
+class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
+  "wavefrontsize"#Value,
+  "WavefrontSize",
+  !cast<string>(Value),
+  "The number of threads per wavefront"
+>;
+
+def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
+def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
+def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
+
+class SubtargetFeatureGeneration <string Value, string Subtarget,
+                                  list<SubtargetFeature> Implies> :
+        SubtargetFeature <Value, "Gen", Subtarget#"::"#Value,
+                          Value#" GPU generation", Implies>;
+
+def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
+  "DX10Clamp",
+  "true",
+  "clamp modifier clamps NaNs to 0.0"
+>;
+
+def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
+  "EnablePromoteAlloca",
+  "true",
+  "Enable promote alloca pass"
+>;
+
Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -104,15 +104,11 @@
   bool isNoNanSrc(SDValue N) const;
   bool isInlineImmediate(const SDNode *N) const;
 
-  bool isConstantLoad(const MemSDNode *N, int cbID) const;
   bool isUniformBr(const SDNode *N) const;
 
   SDNode *glueCopyToM0(SDNode *N) const;
 
   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
-  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
-  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
-                                       SDValue& Offset);
   virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
   virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
@@ -227,9 +223,18 @@
 };
 
 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
+  const R600Subtarget *Subtarget;
+  AMDGPUAS AMDGPUASI;
+
+  bool isConstantLoad(const MemSDNode *N, int cbID) const;
+  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
+  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
+                                       SDValue& Offset);
 public:
   explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
-      AMDGPUDAGToDAGISel(TM, OptLevel) {}
+      AMDGPUDAGToDAGISel(TM, OptLevel) {
+    AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
+      }
 
   void Select(SDNode *N) override;
 
@@ -237,6 +242,11 @@
                           SDValue &Offset) override;
   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
                           SDValue &Offset) override;
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+protected:
+  // Include the pieces autogenerated from the target description.
+#include "R600GenDAGISel.inc"
 };
 
 }  // end anonymous namespace
@@ -280,8 +290,7 @@
 }
 
 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
-  const SIInstrInfo *TII
-    = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
+  const SIInstrInfo *TII = Subtarget->getInstrInfo();
 
   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
     return TII->isInlineConstant(C->getAPIntValue());
@@ -636,16 +645,6 @@
   SelectCode(N);
 }
 
-bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
-  if (!N->readMem())
-    return false;
-  if (CbId == -1)
-    return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
-           N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
-
-  return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
-}
-
 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
   const Instruction *Term = BB->getTerminator();
@@ -661,26 +660,6 @@
 // Complex Patterns
 //===----------------------------------------------------------------------===//
 
-bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
-                                                         SDValue& IntPtr) {
-  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
-    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
-                                       true);
-    return true;
-  }
-  return false;
-}
-
-bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
-    SDValue& BaseReg, SDValue &Offset) {
-  if (!isa<ConstantSDNode>(Addr)) {
-    BaseReg = Addr;
-    Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
-    return true;
-  }
-  return false;
-}
-
 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
                                             SDValue &Offset) {
   return false;
@@ -692,11 +671,11 @@
   SDLoc DL(Addr);
 
   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
-    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
-    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
@@ -2159,6 +2138,41 @@
   } while (IsModified);
 }
 
+bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+  Subtarget = &MF.getSubtarget<R600Subtarget>();
+  return SelectionDAGISel::runOnMachineFunction(MF);
+}
+
+bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
+  if (!N->readMem())
+    return false;
+  if (CbId == -1)
+    return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
+           N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
+
+  return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
+}
+
+bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
+                                                         SDValue& IntPtr) {
+  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
+    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
+                                       true);
+    return true;
+  }
+  return false;
+}
+
+bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
+    SDValue& BaseReg, SDValue &Offset) {
+  if (!isa<ConstantSDNode>(Addr)) {
+    BaseReg = Addr;
+    Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
+    return true;
+  }
+  return false;
+}
+
 void R600DAGToDAGISel::Select(SDNode *N) {
   unsigned int Opc = N->getOpcode();
   if (N->isMachineOpcode()) {
@@ -2179,12 +2193,12 @@
     // pass. We want to avoid 128 bits copies as much as possible because they
     // can't be bundled by our scheduler.
     switch(NumVectorElts) {
-    case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
+    case 2: RegClassID = R600::R600_Reg64RegClassID; break;
     case 4:
       if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
-        RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
+        RegClassID = R600::R600_Reg128VerticalRegClassID;
       else
-        RegClassID = AMDGPU::R600_Reg128RegClassID;
+        RegClassID = R600::R600_Reg128RegClassID;
       break;
     default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
     }
@@ -2202,11 +2216,11 @@
   SDLoc DL(Addr);
 
   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
-    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
-    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
@@ -2237,7 +2251,7 @@
              && isInt<16>(IMMOffset->getZExtValue())) {
     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
                                   SDLoc(CurDAG->getEntryNode()),
-                                  AMDGPU::ZERO, MVT::i32);
+                                  R600::ZERO, MVT::i32);
     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
                                        MVT::i32);
     return true;
Index: lib/Target/AMDGPU/AMDGPUISelLowering.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -23,11 +23,13 @@
 namespace llvm {
 
 class AMDGPUMachineFunction;
-class AMDGPUSubtarget;
+class AMDGPUCommonSubtarget;
 struct ArgDescriptor;
 
 class AMDGPUTargetLowering : public TargetLowering {
 private:
+  const AMDGPUCommonSubtarget *Subtarget;
+
   /// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been
   /// legalized from a smaller type VT. Need to match pre-legalized type because
   /// the generic legalization inserts the add/sub between the select and
@@ -39,7 +41,6 @@
   static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
 
 protected:
-  const AMDGPUSubtarget *Subtarget;
   AMDGPUAS AMDGPUASI;
 
   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
@@ -123,7 +124,7 @@
   void analyzeFormalArgumentsCompute(CCState &State,
                               const SmallVectorImpl<ISD::InputArg> &Ins) const;
 public:
-  AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
+  AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUCommonSubtarget &STI);
 
   bool mayIgnoreSignedZero(SDValue Op) const {
     if (getTargetMachine().Options.NoSignedZerosFPMath)
Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -151,7 +151,7 @@
 }
 
 AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
-                                           const AMDGPUSubtarget &STI)
+                                           const AMDGPUCommonSubtarget &STI)
     : TargetLowering(TM), Subtarget(&STI) {
   AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
   // Lower floating point store/load to integer store/load to reduce the number
@@ -326,10 +326,6 @@
   setOperationAction(ISD::FLOG, MVT::f32, Custom);
   setOperationAction(ISD::FLOG10, MVT::f32, Custom);
 
-  if (Subtarget->has16BitInsts()) {
-    setOperationAction(ISD::FLOG, MVT::f16, Custom);
-    setOperationAction(ISD::FLOG10, MVT::f16, Custom);
-  }
 
   setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
   setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
@@ -337,10 +333,6 @@
   setOperationAction(ISD::FREM, MVT::f32, Custom);
   setOperationAction(ISD::FREM, MVT::f64, Custom);
 
-  // v_mad_f32 does not support denormals according to some sources.
-  if (!Subtarget->hasFP32Denormals())
-    setOperationAction(ISD::FMAD, MVT::f32, Legal);
-
   // Expand to fneg + fadd.
   setOperationAction(ISD::FSUB, MVT::f64, Expand);
 
@@ -355,19 +347,6 @@
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
 
-  if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
-    setOperationAction(ISD::FCEIL, MVT::f64, Custom);
-    setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
-    setOperationAction(ISD::FRINT, MVT::f64, Custom);
-    setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
-  }
-
-  if (!Subtarget->hasBFI()) {
-    // fcopysign can be done in a single instruction with BFI.
-    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
-    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
-  }
-
   setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
   setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
   setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
@@ -399,12 +378,6 @@
     setOperationAction(ISD::SUBE, VT, Legal);
   }
 
-  if (!Subtarget->hasBCNT(32))
-    setOperationAction(ISD::CTPOP, MVT::i32, Expand);
-
-  if (!Subtarget->hasBCNT(64))
-    setOperationAction(ISD::CTPOP, MVT::i64, Expand);
-
   // The hardware supports 32-bit ROTR, but not ROTL.
   setOperationAction(ISD::ROTL, MVT::i32, Expand);
   setOperationAction(ISD::ROTL, MVT::i64, Expand);
@@ -424,28 +397,11 @@
   setOperationAction(ISD::SMAX, MVT::i32, Legal);
   setOperationAction(ISD::UMAX, MVT::i32, Legal);
 
-  if (Subtarget->hasFFBH())
-    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
-
-  if (Subtarget->hasFFBL())
-    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
-
   setOperationAction(ISD::CTTZ, MVT::i64, Custom);
   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);
   setOperationAction(ISD::CTLZ, MVT::i64, Custom);
   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
 
-  // We only really have 32-bit BFE instructions (and 16-bit on VI).
-  //
-  // On SI+ there are 64-bit BFEs, but they are scalar only and there isn't any
-  // effort to match them now. We want this to be false for i64 cases when the
-  // extraction isn't restricted to the upper or lower half. Ideally we would
-  // have some pass reduce 64-bit extracts to 32-bit if possible. Extracts that
-  // span the midpoint are probably relatively rare, so don't worry about them
-  // for now.
-  if (Subtarget->hasBFE())
-    setHasExtractBitsInsn(true);
-
   static const MVT::SimpleValueType VectorIntTypes[] = {
     MVT::v2i32, MVT::v4i32
   };
@@ -550,11 +506,6 @@
   // vector compares until that is fixed.
   setHasMultipleConditionRegisters(true);
 
-  // SI at least has hardware support for floating point exceptions, but no way
-  // of using or handling them is implemented. They are also optional in OpenCL
-  // (Section 7.3)
-  setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
-
   PredictableSelectIsExpensive = false;
 
   // We want to find all load dependencies for long chains of stores to enable
@@ -776,7 +727,7 @@
     {
       const LoadSDNode * L = dyn_cast<LoadSDNode>(N);
       if (L->getMemOperand()->getAddrSpace()
-      == Subtarget->getAMDGPUAS().CONSTANT_ADDRESS_32BIT)
+      == AMDGPUASI.CONSTANT_ADDRESS_32BIT)
         return true;
       return false;
     }
@@ -4302,9 +4253,11 @@
     switch (IID) {
     case Intrinsic::amdgcn_mbcnt_lo:
     case Intrinsic::amdgcn_mbcnt_hi: {
+      const SISubtarget &ST =
+          DAG.getMachineFunction().getSubtarget<SISubtarget>();
       // These return at most the wavefront size - 1.
       unsigned Size = Op.getValueType().getSizeInBits();
-      Known.Zero.setHighBits(Size - Subtarget->getWavefrontSizeLog2());
+      Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2());
       break;
     }
     default:
Index: lib/Target/AMDGPU/AMDGPUInstrInfo.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -20,10 +20,6 @@
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 
-#define GET_INSTRINFO_HEADER
-#include "AMDGPUGenInstrInfo.inc"
-#undef GET_INSTRINFO_HEADER
-
 namespace llvm {
 
 class AMDGPUSubtarget;
@@ -31,26 +27,10 @@
 class MachineInstr;
 class MachineInstrBuilder;
 
-class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
-private:
-  const AMDGPUSubtarget &ST;
-
-  virtual void anchor();
-protected:
-  AMDGPUAS AMDGPUASI;
-
+class AMDGPUInstrInfo {
 public:
   explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
 
-  bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
-                               int64_t Offset1, int64_t Offset2,
-                               unsigned NumLoads) const override;
-
-  /// Return a target-specific opcode if Opcode is a pseudo instruction.
-  /// Return -1 if the target-specific opcode for the pseudo instruction does
-  /// not exist. If Opcode is not a pseudo instruction, this is identity.
-  int pseudoToMCOpcode(int Opcode) const;
-
   static bool isUniformMMO(const MachineMemOperand *MMO);
 };
 
Index: lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -23,109 +23,7 @@
 
 using namespace llvm;
 
-#define GET_INSTRINFO_CTOR_DTOR
-#include "AMDGPUGenInstrInfo.inc"
-
-namespace llvm {
-namespace AMDGPU {
-#define GET_RSRCINTRINSIC_IMPL
-#include "AMDGPUGenSearchableTables.inc"
-
-#define GET_D16IMAGEDIMINTRINSIC_IMPL
-#include "AMDGPUGenSearchableTables.inc"
-}
-}
-
-// Pin the vtable to this file.
-void AMDGPUInstrInfo::anchor() {}
-
-AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
-  : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
-    ST(ST),
-    AMDGPUASI(ST.getAMDGPUAS()) {}
-
-// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
-// the first 16 loads will be interleaved with the stores, and the next 16 will
-// be clustered as expected. It should really split into 2 16 store batches.
-//
-// Loads are clustered until this returns false, rather than trying to schedule
-// groups of stores. This also means we have to deal with saying different
-// address space loads should be clustered, and ones which might cause bank
-// conflicts.
-//
-// This might be deprecated so it might not be worth that much effort to fix.
-bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
-                                              int64_t Offset0, int64_t Offset1,
-                                              unsigned NumLoads) const {
-  assert(Offset1 > Offset0 &&
-         "Second offset should be larger than first offset!");
-  // If we have less than 16 loads in a row, and the offsets are within 64
-  // bytes, then schedule together.
-
-  // A cacheline is 64 bytes (for global memory).
-  return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
-}
-
-// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
-enum SIEncodingFamily {
-  SI = 0,
-  VI = 1,
-  SDWA = 2,
-  SDWA9 = 3,
-  GFX80 = 4,
-  GFX9 = 5
-};
-
-static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
-  switch (ST.getGeneration()) {
-  case AMDGPUSubtarget::SOUTHERN_ISLANDS:
-  case AMDGPUSubtarget::SEA_ISLANDS:
-    return SIEncodingFamily::SI;
-  case AMDGPUSubtarget::VOLCANIC_ISLANDS:
-  case AMDGPUSubtarget::GFX9:
-    return SIEncodingFamily::VI;
-
-  // FIXME: This should never be called for r600 GPUs.
-  case AMDGPUSubtarget::R600:
-  case AMDGPUSubtarget::R700:
-  case AMDGPUSubtarget::EVERGREEN:
-  case AMDGPUSubtarget::NORTHERN_ISLANDS:
-    return SIEncodingFamily::SI;
-  }
-
-  llvm_unreachable("Unknown subtarget generation!");
-}
-
-int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
-  SIEncodingFamily Gen = subtargetEncodingFamily(ST);
-
-  if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
-    ST.getGeneration() >= AMDGPUSubtarget::GFX9)
-    Gen = SIEncodingFamily::GFX9;
-
-  if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
-    Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
-                                                      : SIEncodingFamily::SDWA;
-  // Adjust the encoding family to GFX80 for D16 buffer instructions when the
-  // subtarget has UnpackedD16VMem feature.
-  // TODO: remove this when we discard GFX80 encoding.
-  if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
-                              && !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
-    Gen = SIEncodingFamily::GFX80;
-
-  int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
-
-  // -1 means that Opcode is already a native instruction.
-  if (MCOp == -1)
-    return Opcode;
-
-  // (uint16_t)-1 means that Opcode is a pseudo instruction that has
-  // no encoding in the given subtarget generation.
-  if (MCOp == (uint16_t)-1)
-    return -1;
-
-  return MCOp;
-}
+AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) { }
 
 // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
 bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
Index: lib/Target/AMDGPU/AMDGPUInstructions.td
===================================================================
--- lib/Target/AMDGPU/AMDGPUInstructions.td
+++ lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -42,6 +42,47 @@
   field bits<32> Inst = 0xffffffff;
 }
 
+//===---------------------------------------------------------------------===//
+// Return instruction
+//===---------------------------------------------------------------------===//
+
+class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
+: Instruction {
+
+     let Namespace = "AMDGPU";
+     dag OutOperandList = outs;
+     dag InOperandList = ins;
+     let Pattern = pattern;
+     let AsmString = !strconcat(asmstr, "\n");
+     let isPseudo = 1;
+     let Itinerary = NullALU;
+     bit hasIEEEFlag = 0;
+     bit hasZeroOpFlag = 0;
+     let mayLoad = 0;
+     let mayStore = 0;
+     let hasSideEffects = 0;
+     let isCodeGenOnly = 1;
+}
+
+def TruePredicate : Predicate<"true">;
+
+// Exists to help track down where SubtargetPredicate isn't set rather
+// than letting tablegen crash with an unhelpful error.
+def InvalidPred : Predicate<"predicate not set on instruction or pattern">;
+
+class PredicateControl {
+  Predicate SubtargetPredicate = InvalidPred;
+  list<Predicate> AssemblerPredicates = [];
+  Predicate AssemblerPredicate = TruePredicate;
+  list<Predicate> OtherPredicates = [];
+  list<Predicate> Predicates = !listconcat([SubtargetPredicate,
+                                            AssemblerPredicate],
+                                            AssemblerPredicates,
+                                            OtherPredicates);
+}
+class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
+      PredicateControl;
+
 def FP16Denormals : Predicate<"Subtarget->hasFP16Denormals()">;
 def FP32Denormals : Predicate<"Subtarget->hasFP32Denormals()">;
 def FP64Denormals : Predicate<"Subtarget->hasFP64Denormals()">;
@@ -94,12 +135,6 @@
 // Misc. PatFrags
 //===----------------------------------------------------------------------===//
 
-class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
-  (ops node:$src0),
-  (op $src0),
-  [{ return N->hasOneUse(); }]
->;
-
 class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
   (ops node:$src0, node:$src1),
   (op $src0, $src1),
@@ -112,8 +147,6 @@
   [{ return N->hasOneUse(); }]
 >;
 
-def trunc_oneuse : HasOneUseUnaryOp<trunc>;
-
 let Properties = [SDNPCommutative, SDNPAssociative] in {
 def smax_oneuse : HasOneUseBinOp<smax>;
 def smin_oneuse : HasOneUseBinOp<smin>;
@@ -239,6 +272,37 @@
   [{(void)N; return false;}]
 >;
 
+//===----------------------------------------------------------------------===//
+// PatLeafs for Texture Constants
+//===----------------------------------------------------------------------===//
+
+def TEX_ARRAY : PatLeaf<
+  (imm),
+  [{uint32_t TType = (uint32_t)N->getZExtValue();
+    return TType == 9 || TType == 10 || TType == 16;
+  }]
+>;
+
+def TEX_RECT : PatLeaf<
+  (imm),
+  [{uint32_t TType = (uint32_t)N->getZExtValue();
+    return TType == 5;
+  }]
+>;
+
+def TEX_SHADOW : PatLeaf<
+  (imm),
+  [{uint32_t TType = (uint32_t)N->getZExtValue();
+    return (TType >= 6 && TType <= 8) || TType == 13;
+  }]
+>;
+
+def TEX_SHADOW_ARRAY : PatLeaf<
+  (imm),
+  [{uint32_t TType = (uint32_t)N->getZExtValue();
+    return TType == 11 || TType == 12 || TType == 17;
+  }]
+>;
 
 //===----------------------------------------------------------------------===//
 // Load/Store Pattern Fragments
@@ -746,11 +810,3 @@
   (AMDGPUrcp (fsqrt vt:$src)),
   (RsqInst $src)
 >;
-
-include "R600Instructions.td"
-include "R700Instructions.td"
-include "EvergreenInstructions.td"
-include "CaymanInstructions.td"
-
-include "SIInstrInfo.td"
-
Index: lib/Target/AMDGPU/AMDGPUIntrinsics.td
===================================================================
--- lib/Target/AMDGPU/AMDGPUIntrinsics.td
+++ lib/Target/AMDGPU/AMDGPUIntrinsics.td
@@ -14,5 +14,3 @@
 let TargetPrefix = "AMDGPU", isTarget = 1 in {
   def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
 }
-
-include "SIIntrinsics.td"
Index: lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
+++ lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
@@ -117,7 +117,6 @@
     return false;
 
   const TargetMachine &TM = TPC->getTM<TargetMachine>();
-  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(F);
   bool Changed = false;
 
   for (auto *U : F.users()) {
@@ -125,7 +124,7 @@
     if (!CI)
       continue;
 
-    Changed |= ST.makeLIDRangeMetadata(CI);
+    Changed |= AMDGPUCommonSubtarget::get(TM, F).makeLIDRangeMetadata(CI);
   }
   return Changed;
 }
Index: lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -152,7 +152,7 @@
   IsAMDGCN = TT.getArch() == Triple::amdgcn;
   IsAMDHSA = TT.getOS() == Triple::AMDHSA;
 
-  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
+  const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
   if (!ST.isPromoteAllocaEnabled())
     return false;
 
@@ -174,8 +174,8 @@
 
 std::pair<Value *, Value *>
 AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
-  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(
-                                *Builder.GetInsertBlock()->getParent());
+  const Function &F = *Builder.GetInsertBlock()->getParent();
+  const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
 
   if (!IsAMDHSA) {
     Function *LocalSizeYFn
@@ -261,8 +261,8 @@
 }
 
 Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
-  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(
-                                *Builder.GetInsertBlock()->getParent());
+  const AMDGPUCommonSubtarget &ST =
+      AMDGPUCommonSubtarget::get(*TM, *Builder.GetInsertBlock()->getParent());
   Intrinsic::ID IntrID = Intrinsic::ID::not_intrinsic;
 
   switch (N) {
@@ -602,7 +602,7 @@
 bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
 
   FunctionType *FTy = F.getFunctionType();
-  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
+  const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
 
   // If the function has any arguments in the local address space, then it's
   // possible these arguments require the entire local memory space, so
@@ -729,8 +729,7 @@
   if (!SufficientLDS)
     return false;
 
-  const AMDGPUSubtarget &ST =
-    TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction);
+  const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, ContainingFunction);
   unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
 
   const DataLayout &DL = Mod->getDataLayout();
Index: lib/Target/AMDGPU/AMDGPURegisterInfo.td
===================================================================
--- lib/Target/AMDGPU/AMDGPURegisterInfo.td
+++ lib/Target/AMDGPU/AMDGPURegisterInfo.td
@@ -19,5 +19,4 @@
 
 }
 
-include "R600RegisterInfo.td"
 include "SIRegisterInfo.td"
Index: lib/Target/AMDGPU/AMDGPUSubtarget.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -39,22 +39,181 @@
 
 #define GET_SUBTARGETINFO_HEADER
 #include "AMDGPUGenSubtargetInfo.inc"
+#define GET_SUBTARGETINFO_HEADER
+#include "R600GenSubtargetInfo.inc"
 
 namespace llvm {
 
 class StringRef;
 
-class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
+class AMDGPUCommonSubtarget {
+private:
+  Triple TargetTriple;
+
+protected:
+  const FeatureBitset &SubtargetFeatureBits;
+  bool Has16BitInsts;
+  bool HasMadMixInsts;
+  bool FP32Denormals;
+  bool FPExceptions;
+  bool HasSDWA;
+  bool HasVOP3PInsts;
+  bool HasMulI24;
+  bool HasMulU24;
+  bool HasFminFmaxLegacy;
+  bool EnablePromoteAlloca;
+  int LocalMemorySize;
+  unsigned WavefrontSize;
+
+public:
+  AMDGPUCommonSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
+
+  static const AMDGPUCommonSubtarget &get(const MachineFunction &MF);
+  static const AMDGPUCommonSubtarget &get(const TargetMachine &TM,
+                                          const Function &F);
+
+  /// \returns Default range flat work group size for a calling convention.
+  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
+
+  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
+  /// for function \p F, or minimum/maximum flat work group sizes explicitly
+  /// requested using "amdgpu-flat-work-group-size" attribute attached to
+  /// function \p F.
+  ///
+  /// \returns Subtarget's default values if explicitly requested values cannot
+  /// be converted to integer, or violate subtarget's specifications.
+  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
+
+  /// \returns Subtarget's default pair of minimum/maximum number of waves per
+  /// execution unit for function \p F, or minimum/maximum number of waves per
+  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
+  /// attached to function \p F.
+  ///
+  /// \returns Subtarget's default values if explicitly requested values cannot
+  /// be converted to integer, violate subtarget's specifications, or are not
+  /// compatible with minimum/maximum number of waves limited by flat work group
+  /// size, register usage, and/or lds usage.
+  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
+
+  /// Return the amount of LDS that can be used that will not restrict the
+  /// occupancy lower than WaveCount.
+  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
+                                           const Function &) const;
+
+  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
+  /// the given LDS memory size is the only constraint.
+  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
+
+  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
+
+  bool isAmdHsaOS() const {
+    return TargetTriple.getOS() == Triple::AMDHSA;
+  }
+
+  bool isAmdPalOS() const {
+    return TargetTriple.getOS() == Triple::AMDPAL;
+  }
+
+  bool has16BitInsts() const {
+    return Has16BitInsts;
+  }
+
+  bool hasMadMixInsts() const {
+    return HasMadMixInsts;
+  }
+
+  bool hasFP32Denormals() const {
+    return FP32Denormals;
+  }
+
+  bool hasFPExceptions() const {
+    return FPExceptions;
+  }
+
+  bool hasSDWA() const {
+    return HasSDWA;
+  }
+
+  bool hasVOP3PInsts() const {
+    return HasVOP3PInsts;
+  }
+
+  bool hasMulI24() const {
+    return HasMulI24;
+  }
+
+  bool hasMulU24() const {
+    return HasMulU24;
+  }
+
+  bool hasFminFmaxLegacy() const {
+    return HasFminFmaxLegacy;
+  }
+
+  bool isPromoteAllocaEnabled() const {
+    return EnablePromoteAlloca;
+  }
+
+  unsigned getWavefrontSize() const {
+    return WavefrontSize;
+  }
+
+  int getLocalMemorySize() const {
+    return LocalMemorySize;
+  }
+
+  unsigned getAlignmentForImplicitArgPtr() const {
+    return isAmdHsaOS() ? 8 : 4;
+  }
+
+  /// \returns Maximum number of work groups per compute unit supported by the
+  /// subtarget and limited by given \p FlatWorkGroupSize.
+  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
+    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
+                                                  FlatWorkGroupSize);
+  }
+
+  /// \returns Minimum flat work group size supported by the subtarget.
+  unsigned getMinFlatWorkGroupSize() const {
+    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
+  }
+
+  /// \returns Maximum flat work group size supported by the subtarget.
+  unsigned getMaxFlatWorkGroupSize() const {
+    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
+  }
+
+  /// \returns Maximum number of waves per execution unit supported by the
+  /// subtarget and limited by given \p FlatWorkGroupSize.
+  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
+    return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
+                                             FlatWorkGroupSize);
+  }
+
+  /// \returns Minimum number of waves per execution unit supported by the
+  /// subtarget.
+  unsigned getMinWavesPerEU() const {
+    return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
+  }
+
+  unsigned getMaxWavesPerEU() const { return 10; }
+
+  /// Creates value range metadata on an workitemid.* inrinsic call or load.
+  bool makeLIDRangeMetadata(Instruction *I) const;
+
+  virtual ~AMDGPUCommonSubtarget() {}
+};
+
+class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo,
+                        public AMDGPUCommonSubtarget {
 public:
   enum Generation {
-    R600 = 0,
-    R700,
-    EVERGREEN,
-    NORTHERN_ISLANDS,
-    SOUTHERN_ISLANDS,
-    SEA_ISLANDS,
-    VOLCANIC_ISLANDS,
-    GFX9,
+    // Gap for R600 generations, so we can do comparisons between
+    // AMDGPUSubtarget and r600Subtarget.
+    SOUTHERN_ISLANDS = 4,
+    SEA_ISLANDS = 5,
+    VOLCANIC_ISLANDS = 6,
+    GFX9 = 7,
   };
 
   enum {
@@ -96,13 +255,20 @@
     LLVMTrapHandlerRegValue = 1
   };
 
+private:
+  SIFrameLowering FrameLowering;
+
+  /// GlobalISel related APIs.
+  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
+  std::unique_ptr<InstructionSelector> InstSelector;
+  std::unique_ptr<LegalizerInfo> Legalizer;
+  std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
 protected:
   // Basic subtarget description.
   Triple TargetTriple;
-  Generation Gen;
+  unsigned Gen;
   unsigned IsaVersion;
-  unsigned WavefrontSize;
-  int LocalMemorySize;
   int LDSBankCount;
   unsigned MaxPrivateElementSize;
 
@@ -111,9 +277,7 @@
   bool HalfRate64Ops;
 
   // Dynamially set bits that enable features.
-  bool FP32Denormals;
   bool FP64FP16Denormals;
-  bool FPExceptions;
   bool DX10Clamp;
   bool FlatForGlobal;
   bool AutoWaitcntBeforeBarrier;
@@ -130,7 +294,6 @@
   // Used as options.
   bool EnableHugePrivateBuffer;
   bool EnableVGPRSpilling;
-  bool EnablePromoteAlloca;
   bool EnableLoadStoreOpt;
   bool EnableUnsafeDSOffsetFolding;
   bool EnableSIScheduler;
@@ -147,17 +310,13 @@
   bool GFX9Insts;
   bool SGPRInitBug;
   bool HasSMemRealTime;
-  bool Has16BitInsts;
   bool HasIntClamp;
-  bool HasVOP3PInsts;
-  bool HasMadMixInsts;
   bool HasFmaMixInsts;
   bool HasMovrel;
   bool HasVGPRIndexMode;
   bool HasScalarStores;
   bool HasScalarAtomics;
   bool HasInv2PiInlineImm;
-  bool HasSDWA;
   bool HasSDWAOmod;
   bool HasSDWAScalar;
   bool HasSDWASdst;
@@ -182,7 +341,6 @@
   // Dummy feature to use for assembler in tablegen.
   bool FeatureDisable;
 
-  InstrItineraryData InstrItins;
   SelectionDAGTargetInfo TSInfo;
   AMDGPUAS AS;
 
@@ -194,13 +352,30 @@
   AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
                                                    StringRef GPU, StringRef FS);
 
-  const AMDGPUInstrInfo *getInstrInfo() const override = 0;
-  const AMDGPUFrameLowering *getFrameLowering() const override = 0;
-  const AMDGPUTargetLowering *getTargetLowering() const override = 0;
-  const AMDGPURegisterInfo *getRegisterInfo() const override = 0;
+  virtual const SIInstrInfo *getInstrInfo() const override = 0;
 
-  const InstrItineraryData *getInstrItineraryData() const override {
-    return &InstrItins;
+  const SIFrameLowering *getFrameLowering() const override {
+    return &FrameLowering;
+  }
+
+  virtual const SITargetLowering *getTargetLowering() const override = 0;
+
+  virtual const SIRegisterInfo *getRegisterInfo() const override = 0;
+
+  const CallLowering *getCallLowering() const override {
+    return CallLoweringInfo.get();
+  }
+
+  const InstructionSelector *getInstructionSelector() const override {
+    return InstSelector.get();
+  }
+
+  const LegalizerInfo *getLegalizerInfo() const override {
+    return Legalizer.get();
+  }
+
+  const RegisterBankInfo *getRegBankInfo() const override {
+    return RegBankInfo.get();
   }
 
   // Nothing implemented, just prevent crashes on use.
@@ -210,34 +385,18 @@
 
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
-  bool isAmdHsaOS() const {
-    return TargetTriple.getOS() == Triple::AMDHSA;
-  }
-
   bool isMesa3DOS() const {
     return TargetTriple.getOS() == Triple::Mesa3D;
   }
 
-  bool isAmdPalOS() const {
-    return TargetTriple.getOS() == Triple::AMDPAL;
-  }
-
   Generation getGeneration() const {
-    return Gen;
-  }
-
-  unsigned getWavefrontSize() const {
-    return WavefrontSize;
+    return (Generation)Gen;
   }
 
   unsigned getWavefrontSizeLog2() const {
     return Log2_32(WavefrontSize);
   }
 
-  int getLocalMemorySize() const {
-    return LocalMemorySize;
-  }
-
   int getLDSBankCount() const {
     return LDSBankCount;
   }
@@ -250,18 +409,10 @@
     return AS;
   }
 
-  bool has16BitInsts() const {
-    return Has16BitInsts;
-  }
-
   bool hasIntClamp() const {
     return HasIntClamp;
   }
 
-  bool hasVOP3PInsts() const {
-    return HasVOP3PInsts;
-  }
-
   bool hasFP64() const {
     return FP64;
   }
@@ -270,6 +421,10 @@
     return MIMG_R128;
   }
 
+  bool hasHWFP64() const {
+    return FP64;
+  }
+
   bool hasFastFMAF32() const {
     return FastFMAF32;
   }
@@ -279,15 +434,15 @@
   }
 
   bool hasAddr64() const {
-    return (getGeneration() < VOLCANIC_ISLANDS);
+    return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
   }
 
   bool hasBFE() const {
-    return (getGeneration() >= EVERGREEN);
+    return true;
   }
 
   bool hasBFI() const {
-    return (getGeneration() >= EVERGREEN);
+    return true;
   }
 
   bool hasBFM() const {
@@ -295,42 +450,23 @@
   }
 
   bool hasBCNT(unsigned Size) const {
-    if (Size == 32)
-      return (getGeneration() >= EVERGREEN);
-
-    if (Size == 64)
-      return (getGeneration() >= SOUTHERN_ISLANDS);
-
-    return false;
-  }
-
-  bool hasMulU24() const {
-    return (getGeneration() >= EVERGREEN);
-  }
-
-  bool hasMulI24() const {
-    return (getGeneration() >= SOUTHERN_ISLANDS ||
-            hasCaymanISA());
+    return true;
   }
 
   bool hasFFBL() const {
-    return (getGeneration() >= EVERGREEN);
+    return true;
   }
 
   bool hasFFBH() const {
-    return (getGeneration() >= EVERGREEN);
-  }
-
-  bool hasMed3_16() const {
-    return getGeneration() >= GFX9;
+    return true;
   }
 
-  bool hasMin3Max3_16() const {
-    return getGeneration() >= GFX9;
+  virtual bool hasMed3_16() const {
+    return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
 
-  bool hasMadMixInsts() const {
-    return HasMadMixInsts;
+  virtual bool hasMin3Max3_16() const {
+    return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
 
   bool hasFmaMixInsts() const {
@@ -338,15 +474,15 @@
   }
 
   bool hasCARRY() const {
-    return (getGeneration() >= EVERGREEN);
+    return true;
   }
 
-  bool hasBORROW() const {
-    return (getGeneration() >= EVERGREEN);
+  virtual bool hasBORROW() const {
+    return true;
   }
 
-  bool hasCaymanISA() const {
-    return CaymanISA;
+  virtual bool hasCaymanISA() const {
+    return false;
   }
 
   bool hasFMA() const {
@@ -361,10 +497,6 @@
     return EnableHugePrivateBuffer;
   }
 
-  bool isPromoteAllocaEnabled() const {
-    return EnablePromoteAlloca;
-  }
-
   bool unsafeDSOffsetFoldingEnabled() const {
     return EnableUnsafeDSOffsetFolding;
   }
@@ -378,20 +510,10 @@
   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
                                            const Function &) const;
 
-  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
-  /// the given LDS memory size is the only constraint.
-  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
-
-  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
-
   bool hasFP16Denormals() const {
     return FP64FP16Denormals;
   }
 
-  bool hasFP32Denormals() const {
-    return FP32Denormals;
-  }
-
   bool hasFP64Denormals() const {
     return FP64FP16Denormals;
   }
@@ -400,10 +522,6 @@
     return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
 
-  bool hasFPExceptions() const {
-    return FPExceptions;
-  }
-
   bool enableDX10Clamp() const {
     return DX10Clamp;
   }
@@ -445,7 +563,7 @@
   }
 
   bool hasApertureRegs() const {
-   return HasApertureRegs;
+    return HasApertureRegs;
   }
 
   bool isTrapHandlerEnabled() const {
@@ -511,14 +629,6 @@
     return getGeneration() >= SEA_ISLANDS;
   }
 
-  bool hasFminFmaxLegacy() const {
-    return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
-  }
-
-  bool hasSDWA() const {
-    return HasSDWA;
-  }
-
   bool hasSDWAOmod() const {
     return HasSDWAOmod;
   }
@@ -557,10 +667,6 @@
     return isAmdCodeObjectV2(F) ? 0 : 36;
   }
 
-  unsigned getAlignmentForImplicitArgPtr() const {
-    return isAmdHsaOS() ? 8 : 4;
-  }
-
   /// \returns Number of bytes of arguments that are passed to a shader or
   /// kernel in addition to the explicit ones declared for the function.
   unsigned getImplicitArgNumBytes(const Function &F) const {
@@ -589,134 +695,39 @@
     return true;
   }
 
-  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
-  bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
+  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
+  bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
 
   /// \returns Number of execution units per compute unit supported by the
   /// subtarget.
   unsigned getEUsPerCU() const {
-    return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
-  }
-
-  /// \returns Maximum number of work groups per compute unit supported by the
-  /// subtarget and limited by given \p FlatWorkGroupSize.
-  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
-    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
-                                                  FlatWorkGroupSize);
+    return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns Maximum number of waves per compute unit supported by the
   /// subtarget without any kind of limitation.
   unsigned getMaxWavesPerCU() const {
-    return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
+    return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns Maximum number of waves per compute unit supported by the
   /// subtarget and limited by given \p FlatWorkGroupSize.
   unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
-    return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
+    return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
                                              FlatWorkGroupSize);
   }
 
-  /// \returns Minimum number of waves per execution unit supported by the
-  /// subtarget.
-  unsigned getMinWavesPerEU() const {
-    return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
-  }
-
   /// \returns Maximum number of waves per execution unit supported by the
   /// subtarget without any kind of limitation.
   unsigned getMaxWavesPerEU() const {
-    return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
-  }
-
-  /// \returns Maximum number of waves per execution unit supported by the
-  /// subtarget and limited by given \p FlatWorkGroupSize.
-  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
-    return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
-                                             FlatWorkGroupSize);
-  }
-
-  /// \returns Minimum flat work group size supported by the subtarget.
-  unsigned getMinFlatWorkGroupSize() const {
-    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
-  }
-
-  /// \returns Maximum flat work group size supported by the subtarget.
-  unsigned getMaxFlatWorkGroupSize() const {
-    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
+    return AMDGPU::IsaInfo::getMaxWavesPerEU();
   }
 
   /// \returns Number of waves per work group supported by the subtarget and
   /// limited by given \p FlatWorkGroupSize.
   unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
-    return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
-                                                 FlatWorkGroupSize);
-  }
-
-  /// \returns Default range flat work group size for a calling convention.
-  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
-
-  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
-  /// for function \p F, or minimum/maximum flat work group sizes explicitly
-  /// requested using "amdgpu-flat-work-group-size" attribute attached to
-  /// function \p F.
-  ///
-  /// \returns Subtarget's default values if explicitly requested values cannot
-  /// be converted to integer, or violate subtarget's specifications.
-  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
-
-  /// \returns Subtarget's default pair of minimum/maximum number of waves per
-  /// execution unit for function \p F, or minimum/maximum number of waves per
-  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
-  /// attached to function \p F.
-  ///
-  /// \returns Subtarget's default values if explicitly requested values cannot
-  /// be converted to integer, violate subtarget's specifications, or are not
-  /// compatible with minimum/maximum number of waves limited by flat work group
-  /// size, register usage, and/or lds usage.
-  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
-
-  /// Creates value range metadata on an workitemid.* inrinsic call or load.
-  bool makeLIDRangeMetadata(Instruction *I) const;
-};
-
-class R600Subtarget final : public AMDGPUSubtarget {
-private:
-  R600InstrInfo InstrInfo;
-  R600FrameLowering FrameLowering;
-  R600TargetLowering TLInfo;
-
-public:
-  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
-                const TargetMachine &TM);
-
-  const R600InstrInfo *getInstrInfo() const override {
-    return &InstrInfo;
-  }
-
-  const R600FrameLowering *getFrameLowering() const override {
-    return &FrameLowering;
-  }
-
-  const R600TargetLowering *getTargetLowering() const override {
-    return &TLInfo;
-  }
-
-  const R600RegisterInfo *getRegisterInfo() const override {
-    return &InstrInfo.getRegisterInfo();
-  }
-
-  bool hasCFAluBug() const {
-    return CFALUBug;
-  }
-
-  bool hasVertexCache() const {
-    return HasVertexCache;
-  }
-
-  short getTexVTXClauseSize() const {
-    return TexVTXClauseSize;
+    return AMDGPU::IsaInfo::getWavesPerWorkGroup(
+        MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
   }
 };
 
@@ -767,6 +778,8 @@
   const SIRegisterInfo *getRegisterInfo() const override {
     return &InstrInfo.getRegisterInfo();
   }
+  // static wrappers
+  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
 
   // XXX - Why is this here if it isn't in the default pass set?
   bool enableEarlyIfConversion() const override {
@@ -776,7 +789,7 @@
   void overrideSchedPolicy(MachineSchedPolicy &Policy,
                            unsigned NumRegionInstrs) const override;
 
-  bool isVGPRSpillingEnabled(const Function& F) const;
+  bool isVGPRSpillingEnabled(const Function &F) const;
 
   unsigned getMaxNumUserSGPRs() const {
     return 16;
@@ -824,7 +837,7 @@
 
   bool debuggerSupported() const {
     return debuggerInsertNops() && debuggerReserveRegs() &&
-      debuggerEmitPrologue();
+           debuggerEmitPrologue();
   }
 
   bool debuggerInsertNops() const {
@@ -866,16 +879,18 @@
   unsigned getKernArgSegmentSize(const Function &F,
                                  unsigned ExplictArgBytes) const;
 
-  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
+  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
+  /// SGPRs
   unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
 
-  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
+  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
+  /// VGPRs
   unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
 
   /// \returns true if the flat_scratch register should be initialized with the
   /// pointer to the wave's scratch memory rather than a size and offset.
   bool flatScratchIsPointer() const {
-    return getGeneration() >= GFX9;
+    return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
 
   /// \returns true if the machine has merged shaders in which s0-s7 are
@@ -886,35 +901,39 @@
 
   /// \returns SGPR allocation granularity supported by the subtarget.
   unsigned getSGPRAllocGranule() const {
-    return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
+    return AMDGPU::IsaInfo::getSGPRAllocGranule(
+        MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns SGPR encoding granularity supported by the subtarget.
   unsigned getSGPREncodingGranule() const {
-    return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
+    return AMDGPU::IsaInfo::getSGPREncodingGranule(
+        MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns Total number of SGPRs supported by the subtarget.
   unsigned getTotalNumSGPRs() const {
-    return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
+    return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns Addressable number of SGPRs supported by the subtarget.
   unsigned getAddressableNumSGPRs() const {
-    return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
+    return AMDGPU::IsaInfo::getAddressableNumSGPRs(
+        MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns Minimum number of SGPRs that meets the given number of waves per
   /// execution unit requirement supported by the subtarget.
   unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
-    return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
+    return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
+                                           WavesPerEU);
   }
 
   /// \returns Maximum number of SGPRs that meets the given number of waves per
   /// execution unit requirement supported by the subtarget.
   unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
-    return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
-                                           Addressable);
+    return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
+                                           WavesPerEU, Addressable);
   }
 
   /// \returns Reserved number of SGPRs for given function \p MF.
@@ -932,34 +951,39 @@
 
   /// \returns VGPR allocation granularity supported by the subtarget.
   unsigned getVGPRAllocGranule() const {
-    return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());
+    return AMDGPU::IsaInfo::getVGPRAllocGranule(
+        MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns VGPR encoding granularity supported by the subtarget.
   unsigned getVGPREncodingGranule() const {
-    return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
+    return AMDGPU::IsaInfo::getVGPREncodingGranule(
+        MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns Total number of VGPRs supported by the subtarget.
   unsigned getTotalNumVGPRs() const {
-    return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
+    return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns Addressable number of VGPRs supported by the subtarget.
   unsigned getAddressableNumVGPRs() const {
-    return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
+    return AMDGPU::IsaInfo::getAddressableNumVGPRs(
+        MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns Minimum number of VGPRs that meets given number of waves per
   /// execution unit requirement supported by the subtarget.
   unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
-    return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
+    return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
+                                           WavesPerEU);
   }
 
   /// \returns Maximum number of VGPRs that meets given number of waves per
   /// execution unit requirement supported by the subtarget.
   unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
-    return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
+    return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
+                                           WavesPerEU);
   }
 
   /// \returns Reserved number of VGPRs for given function \p MF.
@@ -982,6 +1006,133 @@
       const override;
 };
 
+
+class R600Subtarget final : public R600GenSubtargetInfo,
+                            public AMDGPUCommonSubtarget {
+public:
+  enum Generation { R600 = 0, R700 = 1, EVERGREEN = 2, NORTHERN_ISLANDS = 3 };
+
+private:
+  R600InstrInfo InstrInfo;
+  R600FrameLowering FrameLowering;
+  R600TargetLowering TLInfo;
+  bool FMA;
+  bool CaymanISA;
+  bool CFALUBug;
+  bool DX10Clamp;
+  bool HasVertexCache;
+  bool R600ALUInst;
+  bool FP64;
+  short TexVTXClauseSize;
+  Generation Gen;
+  unsigned MaxPrivateElementSize;
+  int LDSBankCount;
+  InstrItineraryData InstrItins;
+  SelectionDAGTargetInfo TSInfo;
+  AMDGPUAS AS;
+
+public:
+  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
+                const TargetMachine &TM);
+
+  const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+
+  const R600FrameLowering *getFrameLowering() const override {
+    return &FrameLowering;
+  }
+
+  const R600TargetLowering *getTargetLowering() const override {
+    return &TLInfo;
+  }
+
+  const R600RegisterInfo *getRegisterInfo() const override {
+    return &InstrInfo.getRegisterInfo();
+  }
+
+  const InstrItineraryData *getInstrItineraryData() const override {
+    return &InstrItins;
+  }
+
+  // Nothing implemented, just prevent crashes on use.
+  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
+    return &TSInfo;
+  }
+
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  Generation getGeneration() const {
+    return Gen;
+  }
+
+  unsigned getStackAlignment() const {
+    return 4;
+  }
+
+  R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
+                                                 StringRef GPU, StringRef FS);
+
+  bool hasBFE() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasBFI() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasBCNT(unsigned Size) const {
+    if (Size == 32)
+      return (getGeneration() >= EVERGREEN);
+
+    return false;
+  }
+
+  bool hasBORROW() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasCARRY() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasCaymanISA() const {
+    return CaymanISA;
+  }
+
+  bool hasFFBL() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasFFBH() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasFMA() const { return FMA; }
+
+  bool hasMed3_16() const { return false; }
+
+  bool hasMin3Max3_16() { return false; }
+
+  unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
+    return 36;
+  }
+
+  bool hasCFAluBug() const { return CFALUBug; }
+
+  bool hasVertexCache() const { return HasVertexCache; }
+
+  short getTexVTXClauseSize() const { return TexVTXClauseSize; }
+
+  AMDGPUAS getAMDGPUAS() const { return AS; }
+
+  bool enableMachineScheduler() const override {
+    return true;
+  }
+
+  bool enableSubRegLiveness() const override {
+    return true;
+  }
+};
+
 } // end namespace llvm
 
 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -23,6 +23,7 @@
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include <algorithm>
@@ -34,9 +35,39 @@
 #define GET_SUBTARGETINFO_TARGET_DESC
 #define GET_SUBTARGETINFO_CTOR
 #include "AMDGPUGenSubtargetInfo.inc"
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "R600GenSubtargetInfo.inc"
 
 AMDGPUSubtarget::~AMDGPUSubtarget() = default;
 
+R600Subtarget &
+R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
+                                               StringRef GPU, StringRef FS) {
+  SmallString<256> FullFS("+promote-alloca,+dx10-clamp,");
+  FullFS += FS;
+  ParseSubtargetFeatures(GPU, FullFS);
+
+  // FIXME: I don't think think Evergreen has any useful support for
+  // denormals, but should be checked. Should we issue a warning somewhere
+  // if someone tries to enable these?
+  if (getGeneration() <= R600Subtarget::NORTHERN_ISLANDS) {
+    FP32Denormals = false;
+  }
+
+  // Set defaults if needed.
+  if (MaxPrivateElementSize == 0)
+    MaxPrivateElementSize = 4;
+
+  if (LDSBankCount == 0)
+    LDSBankCount = 32;
+
+  HasMulU24 = getGeneration() >= EVERGREEN;
+  HasMulI24 = hasCaymanISA();
+
+  return *this;
+}
+
 AMDGPUSubtarget &
 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
                                                  StringRef GPU, StringRef FS) {
@@ -93,26 +124,44 @@
       HasMovrel = true;
   }
 
+  HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
+
   return *this;
 }
 
+AMDGPUCommonSubtarget::AMDGPUCommonSubtarget(const Triple &TT,
+                                             const FeatureBitset &FeatureBits) :
+  TargetTriple(TT),
+  SubtargetFeatureBits(FeatureBits),
+  Has16BitInsts(false),
+  HasMadMixInsts(false),
+  FP32Denormals(false),
+  FPExceptions(false),
+  HasSDWA(false),
+  HasVOP3PInsts(false),
+  HasMulI24(true),
+  HasMulU24(true),
+  HasFminFmaxLegacy(true),
+  EnablePromoteAlloca(false),
+  LocalMemorySize(0),
+  WavefrontSize(0)
+  { }
+
 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
-                                 const TargetMachine &TM)
-  : AMDGPUGenSubtargetInfo(TT, GPU, FS),
+                                 const TargetMachine &TM) :
+    AMDGPUGenSubtargetInfo(TT, GPU, FS),
+    AMDGPUCommonSubtarget(TT, getFeatureBits()),
+    FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
     TargetTriple(TT),
-    Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
+    Gen(SOUTHERN_ISLANDS),
     IsaVersion(ISAVersion0_0_0),
-    WavefrontSize(0),
-    LocalMemorySize(0),
     LDSBankCount(0),
     MaxPrivateElementSize(0),
 
     FastFMAF32(false),
     HalfRate64Ops(false),
 
-    FP32Denormals(false),
     FP64FP16Denormals(false),
-    FPExceptions(false),
     DX10Clamp(false),
     FlatForGlobal(false),
     AutoWaitcntBeforeBarrier(false),
@@ -129,7 +178,6 @@
 
     EnableHugePrivateBuffer(false),
     EnableVGPRSpilling(false),
-    EnablePromoteAlloca(false),
     EnableLoadStoreOpt(false),
     EnableUnsafeDSOffsetFolding(false),
     EnableSIScheduler(false),
@@ -137,25 +185,18 @@
     DumpCode(false),
 
     FP64(false),
-    FMA(false),
-    MIMG_R128(false),
-    IsGCN(false),
     GCN3Encoding(false),
     CIInsts(false),
     GFX9Insts(false),
     SGPRInitBug(false),
     HasSMemRealTime(false),
-    Has16BitInsts(false),
     HasIntClamp(false),
-    HasVOP3PInsts(false),
-    HasMadMixInsts(false),
     HasFmaMixInsts(false),
     HasMovrel(false),
     HasVGPRIndexMode(false),
     HasScalarStores(false),
     HasScalarAtomics(false),
     HasInv2PiInlineImm(false),
-    HasSDWA(false),
     HasSDWAOmod(false),
     HasSDWAScalar(false),
     HasSDWASdst(false),
@@ -171,20 +212,14 @@
     AddNoCarryInsts(false),
     HasUnpackedD16VMem(false),
 
-    R600ALUInst(false),
-    CaymanISA(false),
-    CFALUBug(false),
-    HasVertexCache(false),
-    TexVTXClauseSize(0),
     ScalarizeGlobal(false),
 
-    FeatureDisable(false),
-    InstrItins(getInstrItineraryForCPU(GPU)) {
+    FeatureDisable(false) {
   AS = AMDGPU::getAMDGPUAS(TT);
   initializeSubtargetDependencies(TT, GPU, FS);
 }
 
-unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
+unsigned AMDGPUCommonSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
   const Function &F) const {
   if (NWaves == 1)
     return getLocalMemorySize();
@@ -194,7 +229,7 @@
   return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
 }
 
-unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
+unsigned AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
   const Function &F) const {
   unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
   unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
@@ -207,13 +242,13 @@
 }
 
 unsigned
-AMDGPUSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
+AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
   const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
   return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());
 }
 
 std::pair<unsigned, unsigned>
-AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
+AMDGPUCommonSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
   switch (CC) {
   case CallingConv::AMDGPU_CS:
   case CallingConv::AMDGPU_KERNEL:
@@ -231,7 +266,7 @@
   }
 }
 
-std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
+std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getFlatWorkGroupSizes(
   const Function &F) const {
   // FIXME: 1024 if function.
   // Default minimum/maximum flat work group sizes.
@@ -261,7 +296,7 @@
   return Requested;
 }
 
-std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
+std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getWavesPerEU(
   const Function &F) const {
   // Default minimum/maximum number of waves per execution unit.
   std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
@@ -309,7 +344,7 @@
   return Requested;
 }
 
-bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
+bool AMDGPUCommonSubtarget::makeLIDRangeMetadata(Instruction *I) const {
   Function *Kernel = I->getParent()->getParent();
   unsigned MinSize = 0;
   unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
@@ -373,10 +408,14 @@
 
 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
                              const TargetMachine &TM) :
-  AMDGPUSubtarget(TT, GPU, FS, TM),
+  R600GenSubtargetInfo(TT, GPU, FS),
+  AMDGPUCommonSubtarget(TT, getFeatureBits()),
   InstrInfo(*this),
   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
-  TLInfo(TM, *this) {}
+  TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
+  DX10Clamp(false),
+  InstrItins(getInstrItineraryForCPU(GPU)),
+  AS (AMDGPU::getAMDGPUAS(TT)) { }
 
 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
                          const GCNTargetMachine &TM)
@@ -624,3 +663,17 @@
     std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
   Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
 }
+
+const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const MachineFunction &MF) {
+  if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn)
+    return static_cast<const AMDGPUCommonSubtarget&>(MF.getSubtarget<AMDGPUSubtarget>());
+  else
+    return static_cast<const AMDGPUCommonSubtarget&>(MF.getSubtarget<R600Subtarget>());
+}
+
+const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const TargetMachine &TM, const Function &F) {
+  if (TM.getTargetTriple().getArch() == Triple::amdgcn)
+    return static_cast<const AMDGPUCommonSubtarget&>(TM.getSubtarget<AMDGPUSubtarget>(F));
+  else
+    return static_cast<const AMDGPUCommonSubtarget&>(TM.getSubtarget<R600Subtarget>(F));
+}
Index: lib/Target/AMDGPU/AMDGPUTargetMachine.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -34,7 +34,6 @@
 class AMDGPUTargetMachine : public LLVMTargetMachine {
 protected:
   std::unique_ptr<TargetLoweringObjectFile> TLOF;
-  AMDGPUIntrinsicInfo IntrinsicInfo;
   AMDGPUAS AS;
 
   StringRef getGPUName(const Function &F) const;
@@ -49,12 +48,8 @@
                       CodeGenOpt::Level OL);
   ~AMDGPUTargetMachine() override;
 
-  const AMDGPUSubtarget *getSubtargetImpl() const;
-  const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override = 0;
-
-  const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
-    return &IntrinsicInfo;
-  }
+  const TargetSubtargetInfo *getSubtargetImpl() const;
+  const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override = 0;
 
   TargetLoweringObjectFile *getObjFileLowering() const override {
     return TLOF.get();
@@ -103,6 +98,7 @@
 
 class GCNTargetMachine final : public AMDGPUTargetMachine {
 private:
+  AMDGPUIntrinsicInfo IntrinsicInfo;
   mutable StringMap<std::unique_ptr<SISubtarget>> SubtargetMap;
 
 public:
@@ -117,6 +113,10 @@
 
   TargetTransformInfo getTargetTransformInfo(const Function &F) override;
 
+  const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
+    return &IntrinsicInfo;
+  }
+
   bool useIPRA() const override {
     return true;
   }
Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -45,17 +45,12 @@
 
   friend BaseT;
 
-  const AMDGPUSubtarget *ST;
-  const AMDGPUTargetLowering *TLI;
+  Triple TargetTriple;
 
 public:
   explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
     : BaseT(TM, F.getParent()->getDataLayout()),
-      ST(TM->getSubtargetImpl(F)),
-      TLI(ST->getTargetLowering()) {}
-
-  const AMDGPUSubtarget *getST() const { return ST; }
-  const AMDGPUTargetLowering *getTLI() const { return TLI; }
+      TargetTriple(TM->getTargetTriple()) {}
 
   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP);
@@ -124,7 +119,7 @@
 public:
   explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
     : BaseT(TM, F.getParent()->getDataLayout()),
-      ST(TM->getSubtargetImpl(F)),
+      ST(static_cast<const AMDGPUSubtarget*>(TM->getSubtargetImpl(F))),
       TLI(ST->getTargetLowering()),
       CommonTTI(TM, F),
       IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
@@ -212,18 +207,18 @@
 
   friend BaseT;
 
-  const AMDGPUSubtarget *ST;
+  const R600Subtarget *ST;
   const AMDGPUTargetLowering *TLI;
   AMDGPUTTIImpl CommonTTI;
 
 public:
   explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
     : BaseT(TM, F.getParent()->getDataLayout()),
-      ST(TM->getSubtargetImpl(F)),
+      ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
       TLI(ST->getTargetLowering()),
       CommonTTI(TM, F)	{}
 
-  const AMDGPUSubtarget *getST() const { return ST; }
+  const R600Subtarget *getST() const { return ST; }
   const AMDGPUTargetLowering *getTLI() const { return TLI; }
 
   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -102,7 +102,7 @@
   unsigned ThresholdPrivate = UnrollThresholdPrivate;
   unsigned ThresholdLocal = UnrollThresholdLocal;
   unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
-  AMDGPUAS ASST = ST->getAMDGPUAS();
+  const AMDGPUAS &ASST = AMDGPU::getAMDGPUAS(TargetTriple);
   for (const BasicBlock *BB : L->getBlocks()) {
     const DataLayout &DL = BB->getModule()->getDataLayout();
     unsigned LocalGEPsSeen = 0;
Index: lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
===================================================================
--- lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -432,19 +432,19 @@
   for (;; --I) {
     if (I == MBB.end())
       continue;
-    if (I->getOpcode() == AMDGPU::PRED_X) {
+    if (I->getOpcode() == R600::PRED_X) {
       switch (I->getOperand(2).getImm()) {
-      case AMDGPU::PRED_SETE_INT:
-        I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT);
+      case R600::PRED_SETE_INT:
+        I->getOperand(2).setImm(R600::PRED_SETNE_INT);
         return;
-      case AMDGPU::PRED_SETNE_INT:
-        I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT);
+      case R600::PRED_SETNE_INT:
+        I->getOperand(2).setImm(R600::PRED_SETE_INT);
         return;
-      case AMDGPU::PRED_SETE:
-        I->getOperand(2).setImm(AMDGPU::PRED_SETNE);
+      case R600::PRED_SETE:
+        I->getOperand(2).setImm(R600::PRED_SETNE);
         return;
-      case AMDGPU::PRED_SETNE:
-        I->getOperand(2).setImm(AMDGPU::PRED_SETE);
+      case R600::PRED_SETNE:
+        I->getOperand(2).setImm(R600::PRED_SETE);
         return;
       default:
         llvm_unreachable("PRED_X Opcode invalid!");
@@ -513,10 +513,10 @@
 
 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
   switch(OldOpcode) {
-  case AMDGPU::JUMP_COND:
-  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
-  case AMDGPU::BRANCH_COND_i32:
-  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
+  case R600::JUMP_COND:
+  case R600::JUMP: return R600::IF_PREDICATE_SET;
+  case R600::BRANCH_COND_i32:
+  case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32;
   default: llvm_unreachable("internal error");
   }
   return -1;
@@ -524,10 +524,10 @@
 
 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
   switch(OldOpcode) {
-  case AMDGPU::JUMP_COND:
-  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
-  case AMDGPU::BRANCH_COND_i32:
-  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
+  case R600::JUMP_COND:
+  case R600::JUMP: return R600::IF_PREDICATE_SET;
+  case R600::BRANCH_COND_i32:
+  case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32;
   default: llvm_unreachable("internal error");
   }
   return -1;
@@ -535,8 +535,8 @@
 
 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
   switch(OldOpcode) {
-  case AMDGPU::JUMP_COND:
-  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
+  case R600::JUMP_COND:
+  case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32;
   default: llvm_unreachable("internal error");
   }
   return -1;
@@ -544,8 +544,8 @@
 
 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
   switch(OldOpcode) {
-  case AMDGPU::JUMP_COND:
-  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
+  case R600::JUMP_COND:
+  case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32;
   default: llvm_unreachable("internal error");
   }
   return -1;
@@ -573,9 +573,9 @@
 
 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
   switch (MI->getOpcode()) {
-    case AMDGPU::JUMP_COND:
-    case AMDGPU::BRANCH_COND_i32:
-    case AMDGPU::BRANCH_COND_f32: return true;
+    case R600::JUMP_COND:
+    case R600::BRANCH_COND_i32:
+    case R600::BRANCH_COND_f32: return true;
   default:
     return false;
   }
@@ -584,8 +584,8 @@
 
 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
   switch (MI->getOpcode()) {
-  case AMDGPU::JUMP:
-  case AMDGPU::BRANCH:
+  case R600::JUMP:
+  case R600::BRANCH:
     return true;
   default:
     return false;
@@ -634,7 +634,7 @@
   MachineBasicBlock::reverse_iterator It = MBB->rbegin();
   if (It != MBB->rend()) {
     MachineInstr *instr = &(*It);
-    if (instr->getOpcode() == AMDGPU::RETURN)
+    if (instr->getOpcode() == R600::RETURN)
       return instr;
   }
   return nullptr;
@@ -687,8 +687,8 @@
    MachineBasicBlock::iterator E = MBB->end();
    MachineBasicBlock::iterator It = Pre;
    while (It != E) {
-     if (Pre->getOpcode() == AMDGPU::CONTINUE
-         && It->getOpcode() == AMDGPU::ENDLOOP)
+     if (Pre->getOpcode() == R600::CONTINUE
+         && It->getOpcode() == R600::ENDLOOP)
        ContInstr.push_back(&*Pre);
      Pre = It;
      ++It;
@@ -1303,15 +1303,15 @@
 
   bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
 
-  //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
-  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
+  //insert R600::ENDIF to avoid special case "input landBlk == NULL"
+  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF);
 
   if (LandBlkHasOtherPred) {
     report_fatal_error("Extra register needed to handle CFG");
     unsigned CmpResReg =
       HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
     report_fatal_error("Extra compare instruction needed to handle CFG");
-    insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
+    insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET,
         CmpResReg, DebugLoc());
   }
 
@@ -1319,7 +1319,7 @@
   // cause an assertion failure in the PostRA scheduling pass.
   unsigned InitReg =
     HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
-  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
+  insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg,
       DebugLoc());
 
   if (MigrateTrue) {
@@ -1329,7 +1329,7 @@
     // (initVal != 1).
     report_fatal_error("Extra register needed to handle CFG");
   }
-  insertInstrBefore(I, AMDGPU::ELSE);
+  insertInstrBefore(I, R600::ELSE);
 
   if (MigrateFalse) {
     migrateInstruction(FalseMBB, LandBlk, I);
@@ -1341,7 +1341,7 @@
 
   if (LandBlkHasOtherPred) {
     // add endif
-    insertInstrBefore(I, AMDGPU::ENDIF);
+    insertInstrBefore(I, R600::ENDIF);
 
     // put initReg = 2 to other predecessors of landBlk
     for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
@@ -1414,7 +1414,7 @@
   }
 
   if (FalseMBB) {
-    insertInstrBefore(I, AMDGPU::ELSE);
+    insertInstrBefore(I, R600::ELSE);
     MBB->splice(I, FalseMBB, FalseMBB->begin(),
                    FalseMBB->end());
     MBB->removeSuccessor(FalseMBB, true);
@@ -1423,7 +1423,7 @@
     retireBlock(FalseMBB);
     MLI->removeBlock(FalseMBB);
   }
-  insertInstrBefore(I, AMDGPU::ENDIF);
+  insertInstrBefore(I, R600::ENDIF);
 
   BranchMI->eraseFromParent();
 
@@ -1436,8 +1436,8 @@
   LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
                     << " land = BB" << LandMBB->getNumber() << "\n";);
 
-  insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
-  insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
+  insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc());
+  insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc());
   DstBlk->replaceSuccessor(DstBlk, LandMBB);
 }
 
@@ -1453,9 +1453,9 @@
   MachineBasicBlock::iterator I = BranchMI;
   if (TrueBranch != LandMBB)
     reversePredicateSetter(I, *I->getParent());
-  insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
-  insertInstrBefore(I, AMDGPU::BREAK);
-  insertInstrBefore(I, AMDGPU::ENDIF);
+  insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL);
+  insertInstrBefore(I, R600::BREAK);
+  insertInstrBefore(I, R600::ENDIF);
   //now branchInst can be erase safely
   BranchMI->eraseFromParent();
   //now take care of successors, retire blocks
@@ -1484,8 +1484,8 @@
           getBranchZeroOpcode(OldOpcode);
       insertCondBranchBefore(I, BranchOpcode, DL);
       // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
-      insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
-      insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
+      insertInstrEnd(ContingMBB, R600::CONTINUE, DL);
+      insertInstrEnd(ContingMBB, R600::ENDIF, DL);
     } else {
       int BranchOpcode =
           TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
@@ -1500,7 +1500,7 @@
     // location we've just inserted that reference here so it should be
     // representative insertEnd to ensure phi-moves, if exist, go before the
     // continue-instr.
-    insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
+    insertInstrEnd(ContingMBB, R600::CONTINUE,
         getLastDebugLocInBB(ContingMBB));
   }
 }
@@ -1627,7 +1627,7 @@
     SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
   MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
   FuncRep->push_back(DummyExitBlk);  //insert to function
-  insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
+  insertInstrEnd(DummyExitBlk, R600::RETURN);
 
   for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
        E = RetMBB.end(); It != E; ++It) {
Index: lib/Target/AMDGPU/CMakeLists.txt
===================================================================
--- lib/Target/AMDGPU/CMakeLists.txt
+++ lib/Target/AMDGPU/CMakeLists.txt
@@ -4,7 +4,6 @@
 tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
 tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
 tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler)
 tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)
 tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic)
@@ -18,6 +17,17 @@
 set(LLVM_TARGET_DEFINITIONS AMDGPUGISel.td)
 tablegen(LLVM AMDGPUGenGlobalISel.inc -gen-global-isel)
 
+set(LLVM_TARGET_DEFINITIONS R600.td)
+tablegen(LLVM R600GenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM R600GenCallingConv.inc -gen-callingconv)
+tablegen(LLVM R600GenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM R600GenDFAPacketizer.inc -gen-dfa-packetizer)
+tablegen(LLVM R600GenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM R600GenIntrinsics.inc -gen-tgt-intrinsic)
+tablegen(LLVM R600GenMCCodeEmitter.inc -gen-emitter)
+tablegen(LLVM R600GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM R600GenSubtargetInfo.inc -gen-subtarget)
+
 add_public_tablegen_target(AMDGPUCommonTableGen)
 
 add_llvm_target(AMDGPUCodeGen
Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
===================================================================
--- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -20,6 +20,7 @@
 #include "Disassembler/AMDGPUDisassembler.h"
 #include "AMDGPU.h"
 #include "AMDGPURegisterInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIDefines.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "Utils/AMDGPUBaseInfo.h"
Index: lib/Target/AMDGPU/EvergreenInstructions.td
===================================================================
--- lib/Target/AMDGPU/EvergreenInstructions.td
+++ lib/Target/AMDGPU/EvergreenInstructions.td
@@ -14,14 +14,13 @@
 //===----------------------------------------------------------------------===//
 
 def isEG : Predicate<
-  "Subtarget->getGeneration() >= AMDGPUSubtarget::EVERGREEN && "
-  "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && "
+  "Subtarget->getGeneration() >= R600Subtarget::EVERGREEN && "
   "!Subtarget->hasCaymanISA()"
 >;
 
 def isEGorCayman : Predicate<
-  "Subtarget->getGeneration() == AMDGPUSubtarget::EVERGREEN ||"
-  "Subtarget->getGeneration() == AMDGPUSubtarget::NORTHERN_ISLANDS"
+  "Subtarget->getGeneration() == R600Subtarget::EVERGREEN ||"
+  "Subtarget->getGeneration() == R600Subtarget::NORTHERN_ISLANDS"
 >;
 
 class EGPat<dag pattern, dag result> : AMDGPUPat<pattern, result> {
Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
===================================================================
--- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -216,13 +216,16 @@
                   raw_ostream &O);
 };
 
-// FIXME: R600 specific parts of AMDGPUInstrPrinter should be moved here, and
-// MCTargetDesc should be using R600InstPrinter for the R600 target.
-class R600InstPrinter : public AMDGPUInstPrinter {
+class R600InstPrinter : public MCInstPrinter {
 public:
   R600InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
                   const MCRegisterInfo &MRI)
-    : AMDGPUInstPrinter(MAI, MII, MRI) {}
+    : MCInstPrinter(MAI, MII, MRI) {}
+
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
 
   void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
===================================================================
--- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -505,11 +505,6 @@
 void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
                                      const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
-  if (!STI.getFeatureBits()[AMDGPU::FeatureGCN]) {
-    static_cast<R600InstPrinter*>(this)->printOperand(MI, OpNo, O);
-    return;
-  }
-
   if (OpNo >= MI->getNumOperands()) {
     O << "/*Missing OP" << OpNo << "*/";
     return;
@@ -960,11 +955,6 @@
 void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
                                         const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
-  if (!STI.getFeatureBits()[AMDGPU::FeatureGCN]) {
-    static_cast<R600InstPrinter*>(this)->printMemOperand(MI, OpNo, O);
-    return;
-  }
-
   printOperand(MI, OpNo, STI, O);
   O  << ", ";
   printOperand(MI, OpNo + 1, STI, O);
@@ -990,16 +980,6 @@
     O << Asm;
 }
 
-void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
-                                 const MCSubtargetInfo &STI, raw_ostream &O) {
-  static_cast<R600InstPrinter*>(this)->printAbs(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
-                                   const MCSubtargetInfo &STI, raw_ostream &O) {
-  static_cast<R600InstPrinter*>(this)->printClamp(MI, OpNo, O);
-}
-
 void AMDGPUInstPrinter::printHigh(const MCInst *MI, unsigned OpNo,
                                   const MCSubtargetInfo &STI,
                                   raw_ostream &O) {
@@ -1026,70 +1006,6 @@
     O << " div:2";
 }
 
-void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  static_cast<R600InstPrinter*>(this)->printLiteral(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
-                                  const MCSubtargetInfo &STI, raw_ostream &O) {
-  static_cast<R600InstPrinter*>(this)->printLast(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
-                                 const MCSubtargetInfo &STI, raw_ostream &O) {
-  static_cast<R600InstPrinter*>(this)->printNeg(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
-                                  const MCSubtargetInfo &STI, raw_ostream &O) {
-  static_cast<R600InstPrinter*>(this)->printOMOD(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
-                                 const MCSubtargetInfo &STI, raw_ostream &O) {
-  static_cast<R600InstPrinter*>(this)->printRel(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
-                                            const MCSubtargetInfo &STI,
-                                            raw_ostream &O) {
-  static_cast<R600InstPrinter*>(this)->printUpdateExecMask(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  static_cast<R600InstPrinter*>(this)->printUpdatePred(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
-                                   const MCSubtargetInfo &STI, raw_ostream &O) {
-  static_cast<R600InstPrinter*>(this)->printWrite(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O) {
-  static_cast<R600InstPrinter*>(this)->printBankSwizzle(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
-                                  const MCSubtargetInfo &STI, raw_ostream &O) {
-  static_cast<R600InstPrinter*>(this)->printRSel(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo,
-                                const MCSubtargetInfo &STI, raw_ostream &O) {
-  static_cast<R600InstPrinter*>(this)->printCT(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
-                                    const MCSubtargetInfo &STI, raw_ostream &O) {
-  static_cast<R600InstPrinter*>(this)->printKCache(MI, OpNo, O);
-}
-
 void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
                                      const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
@@ -1294,6 +1210,13 @@
 
 #include "AMDGPUGenAsmWriter.inc"
 
+void R600InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+		                StringRef Annot, const MCSubtargetInfo &STI) {
+  O.flush();
+  printInstruction(MI, O);
+  printAnnotation(O, Annot);
+}
+
 void R600InstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
                                raw_ostream &O) {
   AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '|');
@@ -1412,7 +1335,7 @@
   if (Op.isReg()) {
     switch (Op.getReg()) {
     // This is the default predicate state, so we don't need to print it.
-    case AMDGPU::PRED_SEL_OFF:
+    case R600::PRED_SEL_OFF:
       break;
 
     default:
@@ -1488,3 +1411,5 @@
     O << " (MASKED)";
   }
 }
+
+#include "R600GenAsmWriter.inc"
Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
===================================================================
--- lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -40,6 +40,7 @@
 MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
                                        const MCRegisterInfo &MRI,
                                        MCContext &Ctx);
+MCInstrInfo *createR600MCInstrInfo();
 
 MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
                                      const MCRegisterInfo &MRI,
@@ -59,6 +60,10 @@
 #include "AMDGPUGenRegisterInfo.inc"
 #undef GET_REGINFO_ENUM
 
+#define GET_REGINFO_ENUM
+#include "R600GenRegisterInfo.inc"
+#undef GET_REGINFO_ENUM
+
 #define GET_INSTRINFO_ENUM
 #define GET_INSTRINFO_OPERAND_ENUM
 #define GET_INSTRINFO_SCHED_ENUM
@@ -67,9 +72,20 @@
 #undef GET_INSTRINFO_OPERAND_ENUM
 #undef GET_INSTRINFO_ENUM
 
+#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_OPERAND_ENUM
+#define GET_INSTRINFO_SCHED_ENUM
+#include "R600GenInstrInfo.inc"
+#undef GET_INSTRINFO_SCHED_ENUM
+#undef GET_INSTRINFO_OPERAND_ENUM
+#undef GET_INSTRINFO_ENUM
 
 #define GET_SUBTARGETINFO_ENUM
 #include "AMDGPUGenSubtargetInfo.inc"
 #undef GET_SUBTARGETINFO_ENUM
 
+#define GET_SUBTARGETINFO_ENUM
+#include "R600GenSubtargetInfo.inc"
+#undef GET_SUBTARGETINFO_ENUM
+
 #endif
Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
===================================================================
--- lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -38,9 +38,17 @@
 #define GET_SUBTARGETINFO_MC_DESC
 #include "AMDGPUGenSubtargetInfo.inc"
 
+#define NoSchedModel NoSchedModelR600
+#define GET_SUBTARGETINFO_MC_DESC
+#include "R600GenSubtargetInfo.inc"
+#undef NoSchedModelR600
+
 #define GET_REGINFO_MC_DESC
 #include "AMDGPUGenRegisterInfo.inc"
 
+#define GET_REGINFO_MC_DESC
+#include "R600GenRegisterInfo.inc"
+
 static MCInstrInfo *createAMDGPUMCInstrInfo() {
   MCInstrInfo *X = new MCInstrInfo();
   InitAMDGPUMCInstrInfo(X);
@@ -49,12 +57,17 @@
 
 static MCRegisterInfo *createAMDGPUMCRegisterInfo(const Triple &TT) {
   MCRegisterInfo *X = new MCRegisterInfo();
-  InitAMDGPUMCRegisterInfo(X, 0);
+  if (TT.getArch() == Triple::r600)
+    InitR600MCRegisterInfo(X, 0);
+  else
+    InitAMDGPUMCRegisterInfo(X, 0);
   return X;
 }
 
 static MCSubtargetInfo *
 createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
+  if (TT.getArch() == Triple::r600)
+    return createR600MCSubtargetInfoImpl(TT, CPU, FS);
   return createAMDGPUMCSubtargetInfoImpl(TT, CPU, FS);
 }
 
@@ -63,8 +76,10 @@
                                                 const MCAsmInfo &MAI,
                                                 const MCInstrInfo &MII,
                                                 const MCRegisterInfo &MRI) {
-  return T.getArch() == Triple::r600 ? new R600InstPrinter(MAI, MII, MRI) : 
-                                       new AMDGPUInstPrinter(MAI, MII, MRI);
+  if (T.getArch() == Triple::r600)
+    return new R600InstPrinter(MAI, MII, MRI);
+  else
+    return new AMDGPUInstPrinter(MAI, MII, MRI);
 }
 
 static MCTargetStreamer *createAMDGPUAsmTargetStreamer(MCStreamer &S,
@@ -90,10 +105,12 @@
 }
 
 extern "C" void LLVMInitializeAMDGPUTargetMC() {
+
+  TargetRegistry::RegisterMCInstrInfo(getTheGCNTarget(), createAMDGPUMCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(getTheAMDGPUTarget(), createR600MCInstrInfo);
   for (Target *T : {&getTheAMDGPUTarget(), &getTheGCNTarget()}) {
     RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
 
-    TargetRegistry::RegisterMCInstrInfo(*T, createAMDGPUMCInstrInfo);
     TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo);
     TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
     TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);
Index: lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
===================================================================
--- lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -8,5 +8,6 @@
   AMDGPUMCTargetDesc.cpp
   AMDGPUTargetStreamer.cpp
   R600MCCodeEmitter.cpp
+  R600MCTargetDesc.cpp
   SIMCCodeEmitter.cpp
 )
Index: lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
===================================================================
--- lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -15,7 +15,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/AMDGPUFixupKinds.h"
-#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "R600Defines.h"
 #include "llvm/MC/MCCodeEmitter.h"
@@ -36,30 +35,40 @@
 
 namespace {
 
-class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
+class R600MCCodeEmitter : public MCCodeEmitter {
   const MCRegisterInfo &MRI;
+  const MCInstrInfo &MCII;
 
 public:
   R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri)
-    : AMDGPUMCCodeEmitter(mcii), MRI(mri) {}
+    : MRI(mri), MCII(mcii) {}
   R600MCCodeEmitter(const R600MCCodeEmitter &) = delete;
   R600MCCodeEmitter &operator=(const R600MCCodeEmitter &) = delete;
 
   /// Encode the instruction and write it to the OS.
   void encodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
-                         const MCSubtargetInfo &STI) const override;
+                         const MCSubtargetInfo &STI) const;
 
   /// \returns the encoding for an MCOperand.
   uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
                              SmallVectorImpl<MCFixup> &Fixups,
-                             const MCSubtargetInfo &STI) const override;
+                             const MCSubtargetInfo &STI) const;
 
 private:
+
   void Emit(uint32_t value, raw_ostream &OS) const;
   void Emit(uint64_t value, raw_ostream &OS) const;
 
   unsigned getHWReg(unsigned regNo) const;
+
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const;
+  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
+  void verifyInstructionPredicates(const MCInst &MI,
+                                   uint64_t AvailableFeatures) const;
+
 };
 
 } // end anonymous namespace
@@ -94,16 +103,16 @@
                               computeAvailableFeatures(STI.getFeatureBits()));
 
   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
-  if (MI.getOpcode() == AMDGPU::RETURN ||
-    MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
-    MI.getOpcode() == AMDGPU::ALU_CLAUSE ||
-    MI.getOpcode() == AMDGPU::BUNDLE ||
-    MI.getOpcode() == AMDGPU::KILL) {
+  if (MI.getOpcode() == R600::RETURN ||
+    MI.getOpcode() == R600::FETCH_CLAUSE ||
+    MI.getOpcode() == R600::ALU_CLAUSE ||
+    MI.getOpcode() == R600::BUNDLE ||
+    MI.getOpcode() == R600::KILL) {
     return;
   } else if (IS_VTX(Desc)) {
     uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups, STI);
     uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
-    if (!(STI.getFeatureBits()[AMDGPU::FeatureCaymanISA])) {
+    if (!(STI.getFeatureBits()[R600::FeatureCaymanISA])) {
       InstWord2 |= 1 << 19; // Mega-Fetch bit
     }
 
@@ -136,7 +145,7 @@
       Emit((uint32_t) 0, OS);
   } else {
     uint64_t Inst = getBinaryCodeForInstr(MI, Fixups, STI);
-    if ((STI.getFeatureBits()[AMDGPU::FeatureR600ALUInst]) &&
+    if ((STI.getFeatureBits()[R600::FeatureR600ALUInst]) &&
        ((Desc.TSFlags & R600_InstFlag::OP1) ||
          Desc.TSFlags & R600_InstFlag::OP2)) {
       uint64_t ISAOpCode = Inst & (0x3FFULL << 39);
@@ -186,4 +195,4 @@
 }
 
 #define ENABLE_INSTR_PREDICATE_VERIFIER
-#include "AMDGPUGenMCCodeEmitter.inc"
+#include "R600GenMCCodeEmitter.inc"
Index: lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
===================================================================
--- /dev/null
+++ lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
@@ -0,0 +1,27 @@
+//===-- R600MCTargetDesc.cpp - R600 Target Descriptions -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This file provides R600 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_MC_DESC
+#include "R600GenInstrInfo.inc"
+
+MCInstrInfo *llvm::createR600MCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitR600MCInstrInfo(X);
+  return X;
+}
Index: lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
===================================================================
--- lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -438,3 +438,6 @@
   llvm_unreachable("Encoding of this operand type is not supported yet.");
   return 0;
 }
+
+#define ENABLE_INSTR_PREDICATE_VERIFIER
+#include "AMDGPUGenMCCodeEmitter.inc"
Index: lib/Target/AMDGPU/R600.td
===================================================================
--- /dev/null
+++ lib/Target/AMDGPU/R600.td
@@ -0,0 +1,51 @@
+
+include "llvm/Target/Target.td"
+
+def R600InstrInfo : InstrInfo {
+  let guessInstructionProperties = 1;
+  let noNamedPositionallyEncodedOperands = 1;
+}
+
+def R600 : Target {
+  let InstructionSet = R600InstrInfo;
+  let AllowRegisterRenaming = 1;
+}
+
+let Namespace = "R600" in {
+
+foreach Index = 0-15 in {
+  def sub#Index : SubRegIndex<32, !shl(Index, 5)>;
+}
+
+include "R600RegisterInfo.td"
+
+}
+
+def NullALU : InstrItinClass;
+def ALU_NULL : FuncUnit;
+
+include "AMDGPUFeatures.td"
+include "R600Schedule.td"
+include "R600Processors.td"
+include "AMDGPUInstrInfo.td"
+include "AMDGPUInstructions.td"
+include "R600Instructions.td"
+include "R700Instructions.td"
+include "EvergreenInstructions.td"
+include "CaymanInstructions.td"
+
+// Calling convention for R600
+def CC_R600 : CallingConv<[
+  CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
+    T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
+    T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
+    T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
+    T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
+    T30_XYZW, T31_XYZW, T32_XYZW
+  ]>>>
+]>;
+
+// Calling convention for compute kernels
+def CC_R600_Kernel : CallingConv<[
+  CCCustom<"allocateKernArg">
+]>;
Index: lib/Target/AMDGPU/R600AsmPrinter.cpp
===================================================================
--- lib/Target/AMDGPU/R600AsmPrinter.cpp
+++ lib/Target/AMDGPU/R600AsmPrinter.cpp
@@ -51,7 +51,7 @@
 
   for (const MachineBasicBlock &MBB : MF) {
     for (const MachineInstr &MI : MBB) {
-      if (MI.getOpcode() == AMDGPU::KILLGT)
+      if (MI.getOpcode() == R600::KILLGT)
         killPixel = true;
       unsigned numOperands = MI.getNumOperands();
       for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
Index: lib/Target/AMDGPU/R600ClauseMergePass.cpp
===================================================================
--- lib/Target/AMDGPU/R600ClauseMergePass.cpp
+++ lib/Target/AMDGPU/R600ClauseMergePass.cpp
@@ -34,8 +34,8 @@
 
 static bool isCFAlu(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
-  case AMDGPU::CF_ALU:
-  case AMDGPU::CF_ALU_PUSH_BEFORE:
+  case R600::CF_ALU:
+  case R600::CF_ALU_PUSH_BEFORE:
     return true;
   default:
     return false;
@@ -85,20 +85,20 @@
 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
   assert(isCFAlu(MI));
   return MI
-      .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT))
+      .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::COUNT))
       .getImm();
 }
 
 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
   assert(isCFAlu(MI));
   return MI
-      .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled))
+      .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::Enabled))
       .getImm();
 }
 
 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
     MachineInstr &CFAlu) const {
-  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
+  int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
   MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
   I++;
   do {
@@ -117,7 +117,7 @@
 bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
                                           const MachineInstr &LatrCFAlu) const {
   assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
-  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
+  int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
   unsigned RootInstCount = getCFAluSize(RootCFAlu),
       LaterInstCount = getCFAluSize(LatrCFAlu);
   unsigned CumuledInsts = RootInstCount + LaterInstCount;
@@ -125,15 +125,15 @@
     LLVM_DEBUG(dbgs() << "Excess inst counts\n");
     return false;
   }
-  if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
+  if (RootCFAlu.getOpcode() == R600::CF_ALU_PUSH_BEFORE)
     return false;
   // Is KCache Bank 0 compatible ?
   int Mode0Idx =
-      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
+      TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE0);
   int KBank0Idx =
-      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
+      TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK0);
   int KBank0LineIdx =
-      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
+      TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR0);
   if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
       RootCFAlu.getOperand(Mode0Idx).getImm() &&
       (LatrCFAlu.getOperand(KBank0Idx).getImm() !=
@@ -145,11 +145,11 @@
   }
   // Is KCache Bank 1 compatible ?
   int Mode1Idx =
-      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
+      TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE1);
   int KBank1Idx =
-      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
+      TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK1);
   int KBank1LineIdx =
-      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
+      TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR1);
   if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
       RootCFAlu.getOperand(Mode1Idx).getImm() &&
       (LatrCFAlu.getOperand(KBank1Idx).getImm() !=
Index: lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
===================================================================
--- lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -94,7 +94,7 @@
 }
 
 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
-  if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
+  if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
       getLoopDepth() > 1)
     return true;
 
@@ -103,10 +103,10 @@
 
   switch(Opcode) {
   default: return false;
-  case AMDGPU::CF_ALU_PUSH_BEFORE:
-  case AMDGPU::CF_ALU_ELSE_AFTER:
-  case AMDGPU::CF_ALU_BREAK:
-  case AMDGPU::CF_ALU_CONTINUE:
+  case R600::CF_ALU_PUSH_BEFORE:
+  case R600::CF_ALU_ELSE_AFTER:
+  case R600::CF_ALU_BREAK:
+  case R600::CF_ALU_CONTINUE:
     if (CurrentSubEntries == 0)
       return false;
     if (ST->getWavefrontSize() == 64) {
@@ -168,8 +168,8 @@
 void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
   CFStack::StackItem Item = CFStack::ENTRY;
   switch(Opcode) {
-  case AMDGPU::CF_PUSH_EG:
-  case AMDGPU::CF_ALU_PUSH_BEFORE:
+  case R600::CF_PUSH_EG:
+  case R600::CF_ALU_PUSH_BEFORE:
     if (!isWQM) {
       if (!ST->hasCaymanISA() &&
           !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
@@ -240,8 +240,8 @@
 
   bool IsTrivialInst(MachineInstr &MI) const {
     switch (MI.getOpcode()) {
-    case AMDGPU::KILL:
-    case AMDGPU::RETURN:
+    case R600::KILL:
+    case R600::RETURN:
       return true;
     default:
       return false;
@@ -253,41 +253,41 @@
     bool isEg = (ST->getGeneration() >= R600Subtarget::EVERGREEN);
     switch (CFI) {
     case CF_TC:
-      Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
+      Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
       break;
     case CF_VC:
-      Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
+      Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
       break;
     case CF_CALL_FS:
-      Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
+      Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
       break;
     case CF_WHILE_LOOP:
-      Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
+      Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
       break;
     case CF_END_LOOP:
-      Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
+      Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
       break;
     case CF_LOOP_BREAK:
-      Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
+      Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
       break;
     case CF_LOOP_CONTINUE:
-      Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
+      Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
       break;
     case CF_JUMP:
-      Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
+      Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
       break;
     case CF_ELSE:
-      Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
+      Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
       break;
     case CF_POP:
-      Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
+      Opcode = isEg ? R600::POP_EG : R600::POP_R600;
       break;
     case CF_END:
       if (ST->hasCaymanISA()) {
-        Opcode = AMDGPU::CF_END_CM;
+        Opcode = R600::CF_END_CM;
         break;
       }
-      Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
+      Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
       break;
     }
     assert (Opcode && "No opcode selected");
@@ -305,21 +305,21 @@
         continue;
       if (MO.isDef()) {
         unsigned Reg = MO.getReg();
-        if (AMDGPU::R600_Reg128RegClass.contains(Reg))
+        if (R600::R600_Reg128RegClass.contains(Reg))
           DstMI = Reg;
         else
           DstMI = TRI->getMatchingSuperReg(Reg,
               AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
-              &AMDGPU::R600_Reg128RegClass);
+              &R600::R600_Reg128RegClass);
       }
       if (MO.isUse()) {
         unsigned Reg = MO.getReg();
-        if (AMDGPU::R600_Reg128RegClass.contains(Reg))
+        if (R600::R600_Reg128RegClass.contains(Reg))
           SrcMI = Reg;
         else
           SrcMI = TRI->getMatchingSuperReg(Reg,
               AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
-              &AMDGPU::R600_Reg128RegClass);
+              &R600::R600_Reg128RegClass);
       }
     }
     if ((DstRegs.find(SrcMI) == DstRegs.end())) {
@@ -359,15 +359,15 @@
 
   void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
     static const unsigned LiteralRegs[] = {
-      AMDGPU::ALU_LITERAL_X,
-      AMDGPU::ALU_LITERAL_Y,
-      AMDGPU::ALU_LITERAL_Z,
-      AMDGPU::ALU_LITERAL_W
+      R600::ALU_LITERAL_X,
+      R600::ALU_LITERAL_Y,
+      R600::ALU_LITERAL_Z,
+      R600::ALU_LITERAL_W
     };
     const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
         TII->getSrcs(MI);
     for (const auto &Src:Srcs) {
-      if (Src.first->getReg() != AMDGPU::ALU_LITERAL_X)
+      if (Src.first->getReg() != R600::ALU_LITERAL_X)
         continue;
       int64_t Imm = Src.second;
       std::vector<MachineOperand *>::iterator It =
@@ -377,7 +377,7 @@
 
       // Get corresponding Operand
       MachineOperand &Operand = MI.getOperand(
-          TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
+          TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
 
       if (It != Lits.end()) {
         // Reuse existing literal reg
@@ -400,7 +400,7 @@
       unsigned LiteralPair0 = Literals[i];
       unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
       InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
-          TII->get(AMDGPU::LITERALS))
+          TII->get(R600::LITERALS))
           .addImm(LiteralPair0)
           .addImm(LiteralPair1);
     }
@@ -442,7 +442,7 @@
       }
       for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
         MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
-            TII->get(AMDGPU::LITERALS));
+            TII->get(R600::LITERALS));
         if (Literals[i]->isImm()) {
             MILit.addImm(Literals[i]->getImm());
         } else {
@@ -471,7 +471,7 @@
                        unsigned &CfCount) {
     CounterPropagateAddr(*Clause.first, CfCount);
     MachineBasicBlock *BB = Clause.first->getParent();
-    BuildMI(BB, DL, TII->get(AMDGPU::FETCH_CLAUSE)).addImm(CfCount);
+    BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
       BB->splice(InsertPos, BB, Clause.second[i]);
     }
@@ -483,7 +483,7 @@
     Clause.first->getOperand(0).setImm(0);
     CounterPropagateAddr(*Clause.first, CfCount);
     MachineBasicBlock *BB = Clause.first->getParent();
-    BuildMI(BB, DL, TII->get(AMDGPU::ALU_CLAUSE)).addImm(CfCount);
+    BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
       BB->splice(InsertPos, BB, Clause.second[i]);
     }
@@ -540,34 +540,34 @@
         }
 
         MachineBasicBlock::iterator MI = I;
-        if (MI->getOpcode() != AMDGPU::ENDIF)
+        if (MI->getOpcode() != R600::ENDIF)
           LastAlu.back() = nullptr;
-        if (MI->getOpcode() == AMDGPU::CF_ALU)
+        if (MI->getOpcode() == R600::CF_ALU)
           LastAlu.back() = &*MI;
         I++;
         bool RequiresWorkAround =
             CFStack.requiresWorkAroundForInst(MI->getOpcode());
         switch (MI->getOpcode()) {
-        case AMDGPU::CF_ALU_PUSH_BEFORE:
+        case R600::CF_ALU_PUSH_BEFORE:
           if (RequiresWorkAround) {
             LLVM_DEBUG(dbgs()
                        << "Applying bug work-around for ALU_PUSH_BEFORE\n");
-            BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
+            BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
                 .addImm(CfCount + 1)
                 .addImm(1);
-            MI->setDesc(TII->get(AMDGPU::CF_ALU));
+            MI->setDesc(TII->get(R600::CF_ALU));
             CfCount++;
-            CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
+            CFStack.pushBranch(R600::CF_PUSH_EG);
           } else
-            CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
+            CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
           LLVM_FALLTHROUGH;
-        case AMDGPU::CF_ALU:
+        case R600::CF_ALU:
           I = MI;
           AluClauses.push_back(MakeALUClause(MBB, I));
           LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
           CfCount++;
           break;
-        case AMDGPU::WHILELOOP: {
+        case R600::WHILELOOP: {
           CFStack.pushLoop();
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
               getHWInstrDesc(CF_WHILE_LOOP))
@@ -580,7 +580,7 @@
           CfCount++;
           break;
         }
-        case AMDGPU::ENDLOOP: {
+        case R600::ENDLOOP: {
           CFStack.popLoop();
           std::pair<unsigned, std::set<MachineInstr *>> Pair =
               std::move(LoopStack.back());
@@ -592,7 +592,7 @@
           CfCount++;
           break;
         }
-        case AMDGPU::IF_PREDICATE_SET: {
+        case R600::IF_PREDICATE_SET: {
           LastAlu.push_back(nullptr);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
               getHWInstrDesc(CF_JUMP))
@@ -604,7 +604,7 @@
           CfCount++;
           break;
         }
-        case AMDGPU::ELSE: {
+        case R600::ELSE: {
           MachineInstr * JumpInst = IfThenElseStack.back();
           IfThenElseStack.pop_back();
           CounterPropagateAddr(*JumpInst, CfCount);
@@ -618,7 +618,7 @@
           CfCount++;
           break;
         }
-        case AMDGPU::ENDIF: {
+        case R600::ENDIF: {
           CFStack.popBranch();
           if (LastAlu.back()) {
             ToPopAfter.push_back(LastAlu.back());
@@ -640,7 +640,7 @@
           MI->eraseFromParent();
           break;
         }
-        case AMDGPU::BREAK: {
+        case R600::BREAK: {
           CfCount ++;
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
               getHWInstrDesc(CF_LOOP_BREAK))
@@ -649,7 +649,7 @@
           MI->eraseFromParent();
           break;
         }
-        case AMDGPU::CONTINUE: {
+        case R600::CONTINUE: {
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
               getHWInstrDesc(CF_LOOP_CONTINUE))
               .addImm(0);
@@ -658,12 +658,12 @@
           CfCount++;
           break;
         }
-        case AMDGPU::RETURN: {
+        case R600::RETURN: {
           DebugLoc DL = MBB.findDebugLoc(MI);
           BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
           CfCount++;
           if (CfCount % 2) {
-            BuildMI(MBB, I, DL, TII->get(AMDGPU::PAD));
+            BuildMI(MBB, I, DL, TII->get(R600::PAD));
             CfCount++;
           }
           MI->eraseFromParent();
@@ -684,7 +684,7 @@
       for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
         MachineInstr *Alu = ToPopAfter[i];
         BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
-            TII->get(AMDGPU::CF_ALU_POP_AFTER))
+            TII->get(R600::CF_ALU_POP_AFTER))
             .addImm(Alu->getOperand(0).getImm())
             .addImm(Alu->getOperand(1).getImm())
             .addImm(Alu->getOperand(2).getImm())
Index: lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
===================================================================
--- lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -52,12 +52,12 @@
 
   unsigned OccupiedDwords(MachineInstr &MI) const {
     switch (MI.getOpcode()) {
-    case AMDGPU::INTERP_PAIR_XY:
-    case AMDGPU::INTERP_PAIR_ZW:
-    case AMDGPU::INTERP_VEC_LOAD:
-    case AMDGPU::DOT_4:
+    case R600::INTERP_PAIR_XY:
+    case R600::INTERP_PAIR_ZW:
+    case R600::INTERP_VEC_LOAD:
+    case R600::DOT_4:
       return 4;
-    case AMDGPU::KILL:
+    case R600::KILL:
       return 0;
     default:
       break;
@@ -77,7 +77,7 @@
                                     E = MI.operands_end();
          It != E; ++It) {
       MachineOperand &MO = *It;
-      if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
+      if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
         ++NumLiteral;
     }
     return 1 + NumLiteral;
@@ -89,12 +89,12 @@
     if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
       return true;
     switch (MI.getOpcode()) {
-    case AMDGPU::PRED_X:
-    case AMDGPU::INTERP_PAIR_XY:
-    case AMDGPU::INTERP_PAIR_ZW:
-    case AMDGPU::INTERP_VEC_LOAD:
-    case AMDGPU::COPY:
-    case AMDGPU::DOT_4:
+    case R600::PRED_X:
+    case R600::INTERP_PAIR_XY:
+    case R600::INTERP_PAIR_ZW:
+    case R600::INTERP_VEC_LOAD:
+    case R600::COPY:
+    case R600::DOT_4:
       return true;
     default:
       return false;
@@ -103,9 +103,9 @@
 
   bool IsTrivialInst(MachineInstr &MI) const {
     switch (MI.getOpcode()) {
-    case AMDGPU::KILL:
-    case AMDGPU::RETURN:
-    case AMDGPU::IMPLICIT_DEF:
+    case R600::KILL:
+    case R600::RETURN:
+    case R600::IMPLICIT_DEF:
       return true;
     default:
       return false;
@@ -132,16 +132,16 @@
                        bool UpdateInstr = true) const {
     std::vector<std::pair<unsigned, unsigned>> UsedKCache;
 
-    if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != AMDGPU::DOT_4)
+    if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4)
       return true;
 
     const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =
         TII->getSrcs(MI);
     assert(
-        (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == AMDGPU::DOT_4) &&
+        (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) &&
         "Can't assign Const");
     for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
-      if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
+      if (Consts[i].first->getReg() != R600::ALU_CONST)
         continue;
       unsigned Sel = Consts[i].second;
       unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
@@ -172,16 +172,16 @@
       return true;
 
     for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
-      if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
+      if (Consts[i].first->getReg() != R600::ALU_CONST)
         continue;
       switch(UsedKCache[j].first) {
       case 0:
         Consts[i].first->setReg(
-            AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[j].second));
+            R600::R600_KC0RegClass.getRegister(UsedKCache[j].second));
         break;
       case 1:
         Consts[i].first->setReg(
-            AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[j].second));
+            R600::R600_KC1RegClass.getRegister(UsedKCache[j].second));
         break;
       default:
         llvm_unreachable("Wrong Cache Line");
@@ -253,7 +253,7 @@
         break;
       if (AluInstCount > TII->getMaxAlusPerClause())
         break;
-      if (I->getOpcode() == AMDGPU::PRED_X) {
+      if (I->getOpcode() == R600::PRED_X) {
         // We put PRED_X in its own clause to ensure that ifcvt won't create
         // clauses with more than 128 insts.
         // IfCvt is indeed checking that "then" and "else" branches of an if
@@ -289,7 +289,7 @@
       AluInstCount += OccupiedDwords(*I);
     }
     unsigned Opcode = PushBeforeModifier ?
-        AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
+        R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU;
     BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
     // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
     // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
@@ -322,7 +322,7 @@
                                                     BB != BB_E; ++BB) {
       MachineBasicBlock &MBB = *BB;
       MachineBasicBlock::iterator I = MBB.begin();
-      if (I != MBB.end() && I->getOpcode() == AMDGPU::CF_ALU)
+      if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)
         continue; // BB was already parsed
       for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
         if (isALU(*I)) {
Index: lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
===================================================================
--- lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -96,16 +96,16 @@
 
       // Expand LDS_*_RET instructions
       if (TII->isLDSRetInstr(MI.getOpcode())) {
-        int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
+        int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
         assert(DstIdx != -1);
         MachineOperand &DstOp = MI.getOperand(DstIdx);
         MachineInstr *Mov = TII->buildMovInstr(&MBB, I,
-                                               DstOp.getReg(), AMDGPU::OQAP);
-        DstOp.setReg(AMDGPU::OQAP);
+                                               DstOp.getReg(), R600::OQAP);
+        DstOp.setReg(R600::OQAP);
         int LDSPredSelIdx = TII->getOperandIdx(MI.getOpcode(),
-                                           AMDGPU::OpName::pred_sel);
+                                           R600::OpName::pred_sel);
         int MovPredSelIdx = TII->getOperandIdx(Mov->getOpcode(),
-                                           AMDGPU::OpName::pred_sel);
+                                           R600::OpName::pred_sel);
         // Copy the pred_sel bit
         Mov->getOperand(MovPredSelIdx).setReg(
             MI.getOperand(LDSPredSelIdx).getReg());
@@ -114,7 +114,7 @@
       switch (MI.getOpcode()) {
       default: break;
       // Expand PRED_X to one of the PRED_SET instructions.
-      case AMDGPU::PRED_X: {
+      case R600::PRED_X: {
         uint64_t Flags = MI.getOperand(3).getImm();
         // The native opcode used by PRED_X is stored as an immediate in the
         // third operand.
@@ -122,17 +122,18 @@
                                             MI.getOperand(2).getImm(), // opcode
                                             MI.getOperand(0).getReg(), // dst
                                             MI.getOperand(1).getReg(), // src0
-                                            AMDGPU::ZERO);             // src1
+                                            R600::ZERO);             // src1
         TII->addFlag(*PredSet, 0, MO_FLAG_MASK);
         if (Flags & MO_FLAG_PUSH) {
-          TII->setImmOperand(*PredSet, AMDGPU::OpName::update_exec_mask, 1);
+          TII->setImmOperand(*PredSet, R600::OpName::update_exec_mask, 1);
         } else {
-          TII->setImmOperand(*PredSet, AMDGPU::OpName::update_pred, 1);
+          TII->setImmOperand(*PredSet, R600::OpName::update_pred, 1);
         }
         MI.eraseFromParent();
         continue;
         }
-      case AMDGPU::DOT_4: {
+      case R600::DOT_4: {
+
         const R600RegisterInfo &TRI = TII->getRegisterInfo();
 
         unsigned DstReg = MI.getOperand(0).getReg();
@@ -141,7 +142,7 @@
         for (unsigned Chan = 0; Chan < 4; ++Chan) {
           bool Mask = (Chan != TRI.getHWRegChan(DstReg));
           unsigned SubDstReg =
-              AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
+              R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
           MachineInstr *BMI =
               TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg);
           if (Chan > 0) {
@@ -156,10 +157,10 @@
           // While not strictly necessary from hw point of view, we force
           // all src operands of a dot4 inst to belong to the same slot.
           unsigned Src0 = BMI->getOperand(
-              TII->getOperandIdx(Opcode, AMDGPU::OpName::src0))
+              TII->getOperandIdx(Opcode, R600::OpName::src0))
               .getReg();
           unsigned Src1 = BMI->getOperand(
-              TII->getOperandIdx(Opcode, AMDGPU::OpName::src1))
+              TII->getOperandIdx(Opcode, R600::OpName::src1))
               .getReg();
           (void) Src0;
           (void) Src1;
@@ -206,14 +207,14 @@
       // T0_W = CUBE T1_Y, T1_Z
       for (unsigned Chan = 0; Chan < 4; Chan++) {
         unsigned DstReg = MI.getOperand(
-                            TII->getOperandIdx(MI, AMDGPU::OpName::dst)).getReg();
+                            TII->getOperandIdx(MI, R600::OpName::dst)).getReg();
         unsigned Src0 = MI.getOperand(
-                           TII->getOperandIdx(MI, AMDGPU::OpName::src0)).getReg();
+                           TII->getOperandIdx(MI, R600::OpName::src0)).getReg();
         unsigned Src1 = 0;
 
         // Determine the correct source registers
         if (!IsCube) {
-          int Src1Idx = TII->getOperandIdx(MI, AMDGPU::OpName::src1);
+          int Src1Idx = TII->getOperandIdx(MI, R600::OpName::src1);
           if (Src1Idx != -1) {
             Src1 = MI.getOperand(Src1Idx).getReg();
           }
@@ -241,7 +242,7 @@
           // the current Channel.
           Mask = (Chan != TRI.getHWRegChan(DstReg));
           unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
-          DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
+          DstReg = R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
         }
 
         // Set the IsLast bit
@@ -250,11 +251,11 @@
         // Add the new instruction
         unsigned Opcode = MI.getOpcode();
         switch (Opcode) {
-        case AMDGPU::CUBE_r600_pseudo:
-          Opcode = AMDGPU::CUBE_r600_real;
+        case R600::CUBE_r600_pseudo:
+          Opcode = R600::CUBE_r600_real;
           break;
-        case AMDGPU::CUBE_eg_pseudo:
-          Opcode = AMDGPU::CUBE_eg_real;
+        case R600::CUBE_eg_pseudo:
+          Opcode = R600::CUBE_eg_real;
           break;
         default:
           break;
@@ -271,12 +272,12 @@
         if (NotLast) {
           TII->addFlag(*NewMI, 0, MO_FLAG_NOT_LAST);
         }
-        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::clamp);
-        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::literal);
-        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_abs);
-        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_abs);
-        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_neg);
-        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_neg);
+        SetFlagInNewMI(NewMI, &MI, R600::OpName::clamp);
+        SetFlagInNewMI(NewMI, &MI, R600::OpName::literal);
+        SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_abs);
+        SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_abs);
+        SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_neg);
+        SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_neg);
       }
       MI.eraseFromParent();
     }
Index: lib/Target/AMDGPU/R600ISelLowering.h
===================================================================
--- lib/Target/AMDGPU/R600ISelLowering.h
+++ lib/Target/AMDGPU/R600ISelLowering.h
@@ -23,6 +23,8 @@
 class R600Subtarget;
 
 class R600TargetLowering final : public AMDGPUTargetLowering {
+
+  const R600Subtarget *Subtarget;
 public:
   R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI);
 
@@ -36,6 +38,7 @@
   void ReplaceNodeResults(SDNode * N,
                           SmallVectorImpl<SDValue> &Results,
                           SelectionDAG &DAG) const override;
+  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
                                bool isVarArg,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
Index: lib/Target/AMDGPU/R600ISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/R600ISelLowering.cpp
+++ lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -14,7 +14,6 @@
 
 #include "R600ISelLowering.h"
 #include "AMDGPUFrameLowering.h"
-#include "AMDGPUIntrinsicInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "R600Defines.h"
 #include "R600FrameLowering.h"
@@ -51,17 +50,31 @@
 
 using namespace llvm;
 
+static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+                            CCValAssign::LocInfo LocInfo,
+                            ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  MachineFunction &MF = State.getMachineFunction();
+  AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
+
+  uint64_t Offset = MFI->allocateKernArg(LocVT.getStoreSize(),
+                                         ArgFlags.getOrigAlign());
+  State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return true;
+}
+
+#include "R600GenCallingConv.inc"
+
 R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
                                        const R600Subtarget &STI)
-    : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
-  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
-  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
-  addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
-  addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
-  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
-  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
+    : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
+  addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
+  addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
+  addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
+  addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
+  addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
+  addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
 
-  computeRegisterProperties(STI.getRegisterInfo());
+  computeRegisterProperties(Subtarget->getRegisterInfo());
 
   // Legalize loads and stores to the private address space.
   setOperationAction(ISD::LOAD, MVT::i32, Custom);
@@ -148,6 +161,11 @@
 
   setOperationAction(ISD::FSUB, MVT::f32, Expand);
 
+  setOperationAction(ISD::FCEIL, MVT::f64, Custom);
+  setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
+  setOperationAction(ISD::FRINT, MVT::f64, Custom);
+  setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
+
   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
 
@@ -216,6 +234,34 @@
     setOperationAction(ISD::FMA, MVT::f32, Expand);
     setOperationAction(ISD::FMA, MVT::f64, Expand);
   }
+ 
+  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
+  // need it for R600.
+  if (!Subtarget->hasFP32Denormals())
+    setOperationAction(ISD::FMAD, MVT::f32, Legal);
+
+  if (!Subtarget->hasBFI()) {
+    // fcopysign can be done in a single instruction with BFI.
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+  }
+
+  if (!Subtarget->hasBCNT(32))
+    setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+
+  if (!Subtarget->hasBCNT(64))
+    setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+
+  if (Subtarget->hasFFBH())
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
+
+  if (Subtarget->hasFFBL())
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
+
+  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
+  // need it for R600.
+  if (Subtarget->hasBFE())
+    setHasExtractBitsInsn(true);
 
   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 
@@ -246,14 +292,10 @@
   setTargetDAGCombine(ISD::LOAD);
 }
 
-const R600Subtarget *R600TargetLowering::getSubtarget() const {
-  return static_cast<const R600Subtarget *>(Subtarget);
-}
-
 static inline bool isEOP(MachineBasicBlock::iterator I) {
   if (std::next(I) == I->getParent()->end())
     return false;
-  return std::next(I)->getOpcode() == AMDGPU::RETURN;
+  return std::next(I)->getOpcode() == R600::RETURN;
 }
 
 MachineBasicBlock *
@@ -262,24 +304,24 @@
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
   MachineBasicBlock::iterator I = MI;
-  const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
+  const R600InstrInfo *TII = Subtarget->getInstrInfo();
 
   switch (MI.getOpcode()) {
   default:
     // Replace LDS_*_RET instruction that don't have any uses with the
     // equivalent LDS_*_NORET instruction.
     if (TII->isLDSRetInstr(MI.getOpcode())) {
-      int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
+      int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
       assert(DstIdx != -1);
       MachineInstrBuilder NewMI;
       // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
       //        LDS_1A2D support and remove this special case.
       if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
-          MI.getOpcode() == AMDGPU::LDS_CMPST_RET)
+          MI.getOpcode() == R600::LDS_CMPST_RET)
         return BB;
 
       NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
-                      TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode())));
+                      TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
       for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
         NewMI.add(MI.getOperand(i));
       }
@@ -288,23 +330,23 @@
     }
     break;
 
-  case AMDGPU::FABS_R600: {
+  case R600::FABS_R600: {
     MachineInstr *NewMI = TII->buildDefaultInstruction(
-        *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
+        *BB, I, R600::MOV, MI.getOperand(0).getReg(),
         MI.getOperand(1).getReg());
     TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
     break;
   }
 
-  case AMDGPU::FNEG_R600: {
+  case R600::FNEG_R600: {
     MachineInstr *NewMI = TII->buildDefaultInstruction(
-        *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
+        *BB, I, R600::MOV, MI.getOperand(0).getReg(),
         MI.getOperand(1).getReg());
     TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
     break;
   }
 
-  case AMDGPU::MASK_WRITE: {
+  case R600::MASK_WRITE: {
     unsigned maskedRegister = MI.getOperand(0).getReg();
     assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
     MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
@@ -312,7 +354,7 @@
     break;
   }
 
-  case AMDGPU::MOV_IMM_F32:
+  case R600::MOV_IMM_F32:
     TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
                                                             .getFPImm()
                                                             ->getValueAPF()
@@ -320,39 +362,39 @@
                                                             .getZExtValue());
     break;
 
-  case AMDGPU::MOV_IMM_I32:
+  case R600::MOV_IMM_I32:
     TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
                      MI.getOperand(1).getImm());
     break;
 
-  case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
+  case R600::MOV_IMM_GLOBAL_ADDR: {
     //TODO: Perhaps combine this instruction with the next if possible
     auto MIB = TII->buildDefaultInstruction(
-        *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_LITERAL_X);
-    int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
+        *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
+    int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
     //TODO: Ugh this is rather ugly
     MIB->getOperand(Idx) = MI.getOperand(1);
     break;
   }
 
-  case AMDGPU::CONST_COPY: {
+  case R600::CONST_COPY: {
     MachineInstr *NewMI = TII->buildDefaultInstruction(
-        *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
-    TII->setImmOperand(*NewMI, AMDGPU::OpName::src0_sel,
+        *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
+    TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
                        MI.getOperand(1).getImm());
     break;
   }
 
-  case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
-  case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
-  case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
+  case R600::RAT_WRITE_CACHELESS_32_eg:
+  case R600::RAT_WRITE_CACHELESS_64_eg:
+  case R600::RAT_WRITE_CACHELESS_128_eg:
     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
         .add(MI.getOperand(0))
         .add(MI.getOperand(1))
         .addImm(isEOP(I)); // Set End of program bit
     break;
 
-  case AMDGPU::RAT_STORE_TYPED_eg:
+  case R600::RAT_STORE_TYPED_eg:
     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
         .add(MI.getOperand(0))
         .add(MI.getOperand(1))
@@ -360,49 +402,49 @@
         .addImm(isEOP(I)); // Set End of program bit
     break;
 
-  case AMDGPU::BRANCH:
-    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+  case R600::BRANCH:
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
         .add(MI.getOperand(0));
     break;
 
-  case AMDGPU::BRANCH_COND_f32: {
+  case R600::BRANCH_COND_f32: {
     MachineInstr *NewMI =
-        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
-                AMDGPU::PREDICATE_BIT)
+        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
+                R600::PREDICATE_BIT)
             .add(MI.getOperand(1))
-            .addImm(AMDGPU::PRED_SETNE)
+            .addImm(R600::PRED_SETNE)
             .addImm(0); // Flags
     TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
-    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
         .add(MI.getOperand(0))
-        .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+        .addReg(R600::PREDICATE_BIT, RegState::Kill);
     break;
   }
 
-  case AMDGPU::BRANCH_COND_i32: {
+  case R600::BRANCH_COND_i32: {
     MachineInstr *NewMI =
-        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
-                AMDGPU::PREDICATE_BIT)
+        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
+                R600::PREDICATE_BIT)
             .add(MI.getOperand(1))
-            .addImm(AMDGPU::PRED_SETNE_INT)
+            .addImm(R600::PRED_SETNE_INT)
             .addImm(0); // Flags
     TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
-    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
         .add(MI.getOperand(0))
-        .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+        .addReg(R600::PREDICATE_BIT, RegState::Kill);
     break;
   }
 
-  case AMDGPU::EG_ExportSwz:
-  case AMDGPU::R600_ExportSwz: {
+  case R600::EG_ExportSwz:
+  case R600::R600_ExportSwz: {
     // Instruction is left unmodified if its not the last one of its type
     bool isLastInstructionOfItsType = true;
     unsigned InstExportType = MI.getOperand(1).getImm();
     for (MachineBasicBlock::iterator NextExportInst = std::next(I),
          EndBlock = BB->end(); NextExportInst != EndBlock;
          NextExportInst = std::next(NextExportInst)) {
-      if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
-          NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
+      if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
+          NextExportInst->getOpcode() == R600::R600_ExportSwz) {
         unsigned CurrentInstExportType = NextExportInst->getOperand(1)
             .getImm();
         if (CurrentInstExportType == InstExportType) {
@@ -414,7 +456,7 @@
     bool EOP = isEOP(I);
     if (!EOP && !isLastInstructionOfItsType)
       return BB;
-    unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40;
+    unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
         .add(MI.getOperand(0))
         .add(MI.getOperand(1))
@@ -427,7 +469,7 @@
         .addImm(EOP);
     break;
   }
-  case AMDGPU::RETURN: {
+  case R600::RETURN: {
     return BB;
   }
   }
@@ -583,23 +625,23 @@
       return LowerImplicitParameter(DAG, VT, DL, 8);
 
     case Intrinsic::r600_read_tgid_x:
-      return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
-                                     AMDGPU::T1_X, VT);
+      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
+                                     R600::T1_X, VT);
     case Intrinsic::r600_read_tgid_y:
-      return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
-                                     AMDGPU::T1_Y, VT);
+      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
+                                     R600::T1_Y, VT);
     case Intrinsic::r600_read_tgid_z:
-      return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
-                                     AMDGPU::T1_Z, VT);
+      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
+                                     R600::T1_Z, VT);
     case Intrinsic::r600_read_tidig_x:
-      return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
-                                     AMDGPU::T0_X, VT);
+      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
+                                     R600::T0_X, VT);
     case Intrinsic::r600_read_tidig_y:
-      return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
-                                     AMDGPU::T0_Y, VT);
+      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
+                                     R600::T0_Y, VT);
     case Intrinsic::r600_read_tidig_z:
-      return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
-                                     AMDGPU::T0_Z, VT);
+      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
+                                     R600::T0_Z, VT);
 
     case Intrinsic::r600_recipsqrt_ieee:
       return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
@@ -1521,7 +1563,7 @@
 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
                                             SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
+  const R600FrameLowering *TFL = Subtarget->getFrameLowering();
 
   FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
 
@@ -1533,6 +1575,28 @@
                          Op.getValueType());
 }
 
+CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
+                                                  bool IsVarArg) const {
+  switch (CC) {
+  case CallingConv::AMDGPU_KERNEL:
+  case CallingConv::SPIR_KERNEL:
+  case CallingConv::C:
+  case CallingConv::Fast:
+  case CallingConv::Cold:
+    return CC_R600_Kernel;
+  case CallingConv::AMDGPU_VS:
+  case CallingConv::AMDGPU_GS:
+  case CallingConv::AMDGPU_PS:
+  case CallingConv::AMDGPU_CS:
+  case CallingConv::AMDGPU_HS:
+  case CallingConv::AMDGPU_ES:
+  case CallingConv::AMDGPU_LS:
+    return CC_R600;
+  default:
+    report_fatal_error("Unsupported calling convention.");
+  }
+}
+
 /// XXX Only kernel functions are supported, so we can assume for now that
 /// every function is a kernel function, but in the future we should use
 /// separate calling conventions for kernel and non-kernel functions.
@@ -1565,7 +1629,7 @@
     }
 
     if (AMDGPU::isShader(CallConv)) {
-      unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
       SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
       InVals.push_back(Register);
       continue;
@@ -1596,7 +1660,7 @@
 
     unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
     unsigned PartOffset = VA.getLocMemOffset();
-    unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF.getFunction()) +
+    unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) +
                       VA.getLocMemOffset();
 
     MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
@@ -1984,26 +2048,26 @@
                                      SDValue &Src, SDValue &Neg, SDValue &Abs,
                                      SDValue &Sel, SDValue &Imm,
                                      SelectionDAG &DAG) const {
-  const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
+  const R600InstrInfo *TII = Subtarget->getInstrInfo();
   if (!Src.isMachineOpcode())
     return false;
 
   switch (Src.getMachineOpcode()) {
-  case AMDGPU::FNEG_R600:
+  case R600::FNEG_R600:
     if (!Neg.getNode())
       return false;
     Src = Src.getOperand(0);
     Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
     return true;
-  case AMDGPU::FABS_R600:
+  case R600::FABS_R600:
     if (!Abs.getNode())
       return false;
     Src = Src.getOperand(0);
     Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
     return true;
-  case AMDGPU::CONST_COPY: {
+  case R600::CONST_COPY: {
     unsigned Opcode = ParentNode->getMachineOpcode();
-    bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+    bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
 
     if (!Sel.getNode())
       return false;
@@ -2014,17 +2078,17 @@
 
     // Gather constants values
     int SrcIndices[] = {
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
+      TII->getOperandIdx(Opcode, R600::OpName::src0),
+      TII->getOperandIdx(Opcode, R600::OpName::src1),
+      TII->getOperandIdx(Opcode, R600::OpName::src2),
+      TII->getOperandIdx(Opcode, R600::OpName::src0_X),
+      TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
+      TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
+      TII->getOperandIdx(Opcode, R600::OpName::src0_W),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_X),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_W)
     };
     std::vector<unsigned> Consts;
     for (int OtherSrcIdx : SrcIndices) {
@@ -2037,7 +2101,7 @@
       }
       if (RegisterSDNode *Reg =
           dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
-        if (Reg->getReg() == AMDGPU::ALU_CONST) {
+        if (Reg->getReg() == R600::ALU_CONST) {
           ConstantSDNode *Cst
             = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
           Consts.push_back(Cst->getZExtValue());
@@ -2052,30 +2116,30 @@
     }
 
     Sel = CstOffset;
-    Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
+    Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
     return true;
   }
-  case AMDGPU::MOV_IMM_GLOBAL_ADDR:
+  case R600::MOV_IMM_GLOBAL_ADDR:
     // Check if the Imm slot is used. Taken from below.
     if (cast<ConstantSDNode>(Imm)->getZExtValue())
       return false;
     Imm = Src.getOperand(0);
-    Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32);
+    Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
     return true;
-  case AMDGPU::MOV_IMM_I32:
-  case AMDGPU::MOV_IMM_F32: {
-    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
+  case R600::MOV_IMM_I32:
+  case R600::MOV_IMM_F32: {
+    unsigned ImmReg = R600::ALU_LITERAL_X;
     uint64_t ImmValue = 0;
 
-    if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
+    if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
       ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
       float FloatValue = FPC->getValueAPF().convertToFloat();
       if (FloatValue == 0.0) {
-        ImmReg = AMDGPU::ZERO;
+        ImmReg = R600::ZERO;
       } else if (FloatValue == 0.5) {
-        ImmReg = AMDGPU::HALF;
+        ImmReg = R600::HALF;
       } else if (FloatValue == 1.0) {
-        ImmReg = AMDGPU::ONE;
+        ImmReg = R600::ONE;
       } else {
         ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
       }
@@ -2083,9 +2147,9 @@
       ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
       uint64_t Value = C->getZExtValue();
       if (Value == 0) {
-        ImmReg = AMDGPU::ZERO;
+        ImmReg = R600::ZERO;
       } else if (Value == 1) {
-        ImmReg = AMDGPU::ONE_INT;
+        ImmReg = R600::ONE_INT;
       } else {
         ImmValue = Value;
       }
@@ -2094,7 +2158,7 @@
     // Check that we aren't already using an immediate.
     // XXX: It's possible for an instruction to have more than one
     // immediate operand, but this is not supported yet.
-    if (ImmReg == AMDGPU::ALU_LITERAL_X) {
+    if (ImmReg == R600::ALU_LITERAL_X) {
       if (!Imm.getNode())
         return false;
       ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
@@ -2114,7 +2178,7 @@
 /// Fold the instructions after selecting them
 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
                                             SelectionDAG &DAG) const {
-  const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
+  const R600InstrInfo *TII = Subtarget->getInstrInfo();
   if (!Node->isMachineOpcode())
     return Node;
 
@@ -2123,36 +2187,36 @@
 
   std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
 
-  if (Opcode == AMDGPU::DOT_4) {
+  if (Opcode == R600::DOT_4) {
     int OperandIdx[] = {
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
+      TII->getOperandIdx(Opcode, R600::OpName::src0_X),
+      TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
+      TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
+      TII->getOperandIdx(Opcode, R600::OpName::src0_W),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_X),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_W)
         };
     int NegIdx[] = {
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
+      TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
+      TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
+      TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
+      TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
     };
     int AbsIdx[] = {
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
+      TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
+      TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
+      TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
+      TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
     };
     for (unsigned i = 0; i < 8; i++) {
       if (OperandIdx[i] < 0)
@@ -2160,7 +2224,7 @@
       SDValue &Src = Ops[OperandIdx[i] - 1];
       SDValue &Neg = Ops[NegIdx[i] - 1];
       SDValue &Abs = Ops[AbsIdx[i] - 1];
-      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+      bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
       int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
       if (HasDst)
         SelIdx--;
@@ -2168,7 +2232,7 @@
       if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
         return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
     }
-  } else if (Opcode == AMDGPU::REG_SEQUENCE) {
+  } else if (Opcode == R600::REG_SEQUENCE) {
     for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
       SDValue &Src = Ops[i];
       if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
@@ -2178,18 +2242,18 @@
     if (!TII->hasInstrModifiers(Opcode))
       return Node;
     int OperandIdx[] = {
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
+      TII->getOperandIdx(Opcode, R600::OpName::src0),
+      TII->getOperandIdx(Opcode, R600::OpName::src1),
+      TII->getOperandIdx(Opcode, R600::OpName::src2)
     };
     int NegIdx[] = {
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
+      TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
+      TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
     };
     int AbsIdx[] = {
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
-      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
+      TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
+      TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
       -1
     };
     for (unsigned i = 0; i < 3; i++) {
@@ -2199,9 +2263,9 @@
       SDValue &Neg = Ops[NegIdx[i] - 1];
       SDValue FakeAbs;
       SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
-      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+      bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
       int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
-      int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
+      int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
       if (HasDst) {
         SelIdx--;
         ImmIdx--;
Index: lib/Target/AMDGPU/R600InstrFormats.td
===================================================================
--- lib/Target/AMDGPU/R600InstrFormats.td
+++ lib/Target/AMDGPU/R600InstrFormats.td
@@ -41,7 +41,7 @@
   bit LDS_1A2D = 0;
 
   let SubtargetPredicate = isR600toCayman;
-  let Namespace = "AMDGPU";
+  let Namespace = "R600";
   let OutOperandList = outs;
   let InOperandList = ins;
   let AsmString = asm;
Index: lib/Target/AMDGPU/R600InstrInfo.h
===================================================================
--- lib/Target/AMDGPU/R600InstrInfo.h
+++ lib/Target/AMDGPU/R600InstrInfo.h
@@ -15,8 +15,11 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_R600INSTRINFO_H
 #define LLVM_LIB_TARGET_AMDGPU_R600INSTRINFO_H
 
-#include "AMDGPUInstrInfo.h"
 #include "R600RegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "R600GenInstrInfo.inc"
 
 namespace llvm {
 
@@ -34,7 +37,7 @@
 class MachineInstrBuilder;
 class R600Subtarget;
 
-class R600InstrInfo final : public AMDGPUInstrInfo {
+class R600InstrInfo final : public R600GenInstrInfo {
 private:
   const R600RegisterInfo RI;
   const R600Subtarget &ST;
@@ -324,7 +327,7 @@
       PseudoSourceValue::PSVKind Kind) const override;
 };
 
-namespace AMDGPU {
+namespace R600 {
 
 int getLDSNoRetOp(uint16_t Opcode);
 
Index: lib/Target/AMDGPU/R600InstrInfo.cpp
===================================================================
--- lib/Target/AMDGPU/R600InstrInfo.cpp
+++ lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -45,10 +45,15 @@
 using namespace llvm;
 
 #define GET_INSTRINFO_CTOR_DTOR
-#include "AMDGPUGenDFAPacketizer.inc"
+#include "R600GenDFAPacketizer.inc"
+
+#define GET_INSTRINFO_CTOR_DTOR
+#define GET_INSTRMAP_INFO
+#define GET_INSTRINFO_NAMED_OPS
+#include "R600GenInstrInfo.inc"
 
 R600InstrInfo::R600InstrInfo(const R600Subtarget &ST)
-  : AMDGPUInstrInfo(ST), RI(), ST(ST) {}
+  : R600GenInstrInfo(-1, -1), RI(), ST(ST) {}
 
 bool R600InstrInfo::isVector(const MachineInstr &MI) const {
   return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
@@ -59,31 +64,31 @@
                                 const DebugLoc &DL, unsigned DestReg,
                                 unsigned SrcReg, bool KillSrc) const {
   unsigned VectorComponents = 0;
-  if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
-      AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
-      (AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
-       AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
+  if ((R600::R600_Reg128RegClass.contains(DestReg) ||
+      R600::R600_Reg128VerticalRegClass.contains(DestReg)) &&
+      (R600::R600_Reg128RegClass.contains(SrcReg) ||
+       R600::R600_Reg128VerticalRegClass.contains(SrcReg))) {
     VectorComponents = 4;
-  } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
-            AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
-            (AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
-             AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
+  } else if((R600::R600_Reg64RegClass.contains(DestReg) ||
+            R600::R600_Reg64VerticalRegClass.contains(DestReg)) &&
+            (R600::R600_Reg64RegClass.contains(SrcReg) ||
+             R600::R600_Reg64VerticalRegClass.contains(SrcReg))) {
     VectorComponents = 2;
   }
 
   if (VectorComponents > 0) {
     for (unsigned I = 0; I < VectorComponents; I++) {
       unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(I);
-      buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
+      buildDefaultInstruction(MBB, MI, R600::MOV,
                               RI.getSubReg(DestReg, SubRegIndex),
                               RI.getSubReg(SrcReg, SubRegIndex))
                               .addReg(DestReg,
                                       RegState::Define | RegState::Implicit);
     }
   } else {
-    MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
+    MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, R600::MOV,
                                                   DestReg, SrcReg);
-    NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
+    NewMI->getOperand(getOperandIdx(*NewMI, R600::OpName::src0))
                                     .setIsKill(KillSrc);
   }
 }
@@ -104,9 +109,9 @@
   switch(Opcode) {
   default:
     return false;
-  case AMDGPU::MOV:
-  case AMDGPU::MOV_IMM_F32:
-  case AMDGPU::MOV_IMM_I32:
+  case R600::MOV:
+  case R600::MOV_IMM_F32:
+  case R600::MOV_IMM_I32:
     return true;
   }
 }
@@ -118,10 +123,10 @@
 bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
   switch(Opcode) {
     default: return false;
-    case AMDGPU::CUBE_r600_pseudo:
-    case AMDGPU::CUBE_r600_real:
-    case AMDGPU::CUBE_eg_pseudo:
-    case AMDGPU::CUBE_eg_real:
+    case R600::CUBE_r600_pseudo:
+    case R600::CUBE_r600_real:
+    case R600::CUBE_eg_pseudo:
+    case R600::CUBE_eg_real:
       return true;
   }
 }
@@ -149,7 +154,7 @@
 }
 
 bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
-  return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
+  return isLDSInstr(Opcode) && getOperandIdx(Opcode, R600::OpName::dst) != -1;
 }
 
 bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const {
@@ -158,12 +163,12 @@
   if (isVector(MI) || isCubeOp(MI.getOpcode()))
     return true;
   switch (MI.getOpcode()) {
-  case AMDGPU::PRED_X:
-  case AMDGPU::INTERP_PAIR_XY:
-  case AMDGPU::INTERP_PAIR_ZW:
-  case AMDGPU::INTERP_VEC_LOAD:
-  case AMDGPU::COPY:
-  case AMDGPU::DOT_4:
+  case R600::PRED_X:
+  case R600::INTERP_PAIR_XY:
+  case R600::INTERP_PAIR_ZW:
+  case R600::INTERP_VEC_LOAD:
+  case R600::COPY:
+  case R600::DOT_4:
     return true;
   default:
     return false;
@@ -173,7 +178,7 @@
 bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
   if (ST.hasCaymanISA())
     return false;
-  return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
+  return (get(Opcode).getSchedClass() == R600::Sched::TransALU);
 }
 
 bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const {
@@ -181,7 +186,7 @@
 }
 
 bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
-  return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
+  return (get(Opcode).getSchedClass() == R600::Sched::VecALU);
 }
 
 bool R600InstrInfo::isVectorOnly(const MachineInstr &MI) const {
@@ -215,8 +220,8 @@
 
 bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
   switch (Opcode) {
-  case AMDGPU::KILLGT:
-  case AMDGPU::GROUP_BARRIER:
+  case R600::KILLGT:
+  case R600::GROUP_BARRIER:
     return true;
   default:
     return false;
@@ -224,11 +229,11 @@
 }
 
 bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const {
-  return MI.findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
+  return MI.findRegisterUseOperandIdx(R600::AR_X) != -1;
 }
 
 bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const {
-  return MI.findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
+  return MI.findRegisterDefOperandIdx(R600::AR_X) != -1;
 }
 
 bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {
@@ -242,7 +247,7 @@
         TargetRegisterInfo::isVirtualRegister(I->getReg()))
       continue;
 
-    if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
+    if (R600::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
       return true;
   }
   return false;
@@ -250,17 +255,17 @@
 
 int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
   static const unsigned SrcSelTable[][2] = {
-    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
-    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
-    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
-    {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
-    {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
-    {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
-    {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
-    {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
-    {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
-    {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
-    {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
+    {R600::OpName::src0, R600::OpName::src0_sel},
+    {R600::OpName::src1, R600::OpName::src1_sel},
+    {R600::OpName::src2, R600::OpName::src2_sel},
+    {R600::OpName::src0_X, R600::OpName::src0_sel_X},
+    {R600::OpName::src0_Y, R600::OpName::src0_sel_Y},
+    {R600::OpName::src0_Z, R600::OpName::src0_sel_Z},
+    {R600::OpName::src0_W, R600::OpName::src0_sel_W},
+    {R600::OpName::src1_X, R600::OpName::src1_sel_X},
+    {R600::OpName::src1_Y, R600::OpName::src1_sel_Y},
+    {R600::OpName::src1_Z, R600::OpName::src1_sel_Z},
+    {R600::OpName::src1_W, R600::OpName::src1_sel_W}
   };
 
   for (const auto &Row : SrcSelTable) {
@@ -275,23 +280,23 @@
 R600InstrInfo::getSrcs(MachineInstr &MI) const {
   SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
 
-  if (MI.getOpcode() == AMDGPU::DOT_4) {
+  if (MI.getOpcode() == R600::DOT_4) {
     static const unsigned OpTable[8][2] = {
-      {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
-      {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
-      {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
-      {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
-      {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
-      {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
-      {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
-      {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
+      {R600::OpName::src0_X, R600::OpName::src0_sel_X},
+      {R600::OpName::src0_Y, R600::OpName::src0_sel_Y},
+      {R600::OpName::src0_Z, R600::OpName::src0_sel_Z},
+      {R600::OpName::src0_W, R600::OpName::src0_sel_W},
+      {R600::OpName::src1_X, R600::OpName::src1_sel_X},
+      {R600::OpName::src1_Y, R600::OpName::src1_sel_Y},
+      {R600::OpName::src1_Z, R600::OpName::src1_sel_Z},
+      {R600::OpName::src1_W, R600::OpName::src1_sel_W},
     };
 
     for (unsigned j = 0; j < 8; j++) {
       MachineOperand &MO =
           MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0]));
       unsigned Reg = MO.getReg();
-      if (Reg == AMDGPU::ALU_CONST) {
+      if (Reg == R600::ALU_CONST) {
         MachineOperand &Sel =
             MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
         Result.push_back(std::make_pair(&MO, Sel.getImm()));
@@ -303,9 +308,9 @@
   }
 
   static const unsigned OpTable[3][2] = {
-    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
-    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
-    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
+    {R600::OpName::src0, R600::OpName::src0_sel},
+    {R600::OpName::src1, R600::OpName::src1_sel},
+    {R600::OpName::src2, R600::OpName::src2_sel},
   };
 
   for (unsigned j = 0; j < 3; j++) {
@@ -314,15 +319,15 @@
       break;
     MachineOperand &MO = MI.getOperand(SrcIdx);
     unsigned Reg = MO.getReg();
-    if (Reg == AMDGPU::ALU_CONST) {
+    if (Reg == R600::ALU_CONST) {
       MachineOperand &Sel =
           MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
       Result.push_back(std::make_pair(&MO, Sel.getImm()));
       continue;
     }
-    if (Reg == AMDGPU::ALU_LITERAL_X) {
+    if (Reg == R600::ALU_LITERAL_X) {
       MachineOperand &Operand =
-          MI.getOperand(getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
+          MI.getOperand(getOperandIdx(MI.getOpcode(), R600::OpName::literal));
       if (Operand.isImm()) {
         Result.push_back(std::make_pair(&MO, Operand.getImm()));
         continue;
@@ -346,7 +351,7 @@
     ++i;
     unsigned Reg = Src.first->getReg();
     int Index = RI.getEncodingValue(Reg) & 0xff;
-    if (Reg == AMDGPU::OQAP) {
+    if (Reg == R600::OQAP) {
       Result.push_back(std::make_pair(Index, 0U));
     }
     if (PV.find(Reg) != PV.end()) {
@@ -436,7 +441,7 @@
       const std::pair<int, unsigned> &Src = Srcs[j];
       if (Src.first < 0 || Src.first == 255)
         continue;
-      if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
+      if (Src.first == GET_REG_INDEX(RI.getEncodingValue(R600::OQAP))) {
         if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
             Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
             // The value from output queue A (denoted by register OQAP) can
@@ -542,7 +547,7 @@
   for (unsigned i = 0, e = IG.size(); i < e; ++i) {
     IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount));
     unsigned Op = getOperandIdx(IG[i]->getOpcode(),
-        AMDGPU::OpName::bank_swizzle);
+        R600::OpName::bank_swizzle);
     ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
         IG[i]->getOperand(Op).getImm());
   }
@@ -611,14 +616,14 @@
       continue;
 
     for (const auto &Src : getSrcs(MI)) {
-      if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
+      if (Src.first->getReg() == R600::ALU_LITERAL_X)
         Literals.insert(Src.second);
       if (Literals.size() > 4)
         return false;
-      if (Src.first->getReg() == AMDGPU::ALU_CONST)
+      if (Src.first->getReg() == R600::ALU_CONST)
         Consts.push_back(Src.second);
-      if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
-          AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
+      if (R600::R600_KC0RegClass.contains(Src.first->getReg()) ||
+          R600::R600_KC1RegClass.contains(Src.first->getReg())) {
         unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
         unsigned Chan = RI.getHWRegChan(Src.first->getReg());
         Consts.push_back((Index << 2) | Chan);
@@ -637,7 +642,7 @@
 static bool
 isPredicateSetter(unsigned Opcode) {
   switch (Opcode) {
-  case AMDGPU::PRED_X:
+  case R600::PRED_X:
     return true;
   default:
     return false;
@@ -659,12 +664,12 @@
 
 static
 bool isJump(unsigned Opcode) {
-  return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
+  return Opcode == R600::JUMP || Opcode == R600::JUMP_COND;
 }
 
 static bool isBranch(unsigned Opcode) {
-  return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
-      Opcode == AMDGPU::BRANCH_COND_f32;
+  return Opcode == R600::BRANCH || Opcode == R600::BRANCH_COND_i32 ||
+      Opcode == R600::BRANCH_COND_f32;
 }
 
 bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
@@ -679,7 +684,7 @@
   if (I == MBB.end())
     return false;
 
-  // AMDGPU::BRANCH* instructions are only available after isel and are not
+  // R600::BRANCH* instructions are only available after isel and are not
   // handled
   if (isBranch(I->getOpcode()))
     return true;
@@ -688,7 +693,7 @@
   }
 
   // Remove successive JUMP
-  while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) {
+  while (I != MBB.begin() && std::prev(I)->getOpcode() == R600::JUMP) {
       MachineBasicBlock::iterator PriorI = std::prev(I);
       if (AllowModify)
         I->removeFromParent();
@@ -699,10 +704,10 @@
   // If there is only one terminator instruction, process it.
   unsigned LastOpc = LastInst.getOpcode();
   if (I == MBB.begin() || !isJump((--I)->getOpcode())) {
-    if (LastOpc == AMDGPU::JUMP) {
+    if (LastOpc == R600::JUMP) {
       TBB = LastInst.getOperand(0).getMBB();
       return false;
-    } else if (LastOpc == AMDGPU::JUMP_COND) {
+    } else if (LastOpc == R600::JUMP_COND) {
       auto predSet = I;
       while (!isPredicateSetter(predSet->getOpcode())) {
         predSet = --I;
@@ -710,7 +715,7 @@
       TBB = LastInst.getOperand(0).getMBB();
       Cond.push_back(predSet->getOperand(1));
       Cond.push_back(predSet->getOperand(2));
-      Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+      Cond.push_back(MachineOperand::CreateReg(R600::PRED_SEL_ONE, false));
       return false;
     }
     return true;  // Can't handle indirect branch.
@@ -721,7 +726,7 @@
   unsigned SecondLastOpc = SecondLastInst.getOpcode();
 
   // If the block ends with a B and a Bcc, handle it.
-  if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
+  if (SecondLastOpc == R600::JUMP_COND && LastOpc == R600::JUMP) {
     auto predSet = --I;
     while (!isPredicateSetter(predSet->getOpcode())) {
       predSet = --I;
@@ -730,7 +735,7 @@
     FBB = LastInst.getOperand(0).getMBB();
     Cond.push_back(predSet->getOperand(1));
     Cond.push_back(predSet->getOperand(2));
-    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+    Cond.push_back(MachineOperand::CreateReg(R600::PRED_SEL_ONE, false));
     return false;
   }
 
@@ -742,8 +747,8 @@
 MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
   for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
       It != E; ++It) {
-    if (It->getOpcode() == AMDGPU::CF_ALU ||
-        It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
+    if (It->getOpcode() == R600::CF_ALU ||
+        It->getOpcode() == R600::CF_ALU_PUSH_BEFORE)
       return It.getReverse();
   }
   return MBB.end();
@@ -760,7 +765,7 @@
 
   if (!FBB) {
     if (Cond.empty()) {
-      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
+      BuildMI(&MBB, DL, get(R600::JUMP)).addMBB(TBB);
       return 1;
     } else {
       MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
@@ -768,14 +773,14 @@
       addFlag(*PredSet, 0, MO_FLAG_PUSH);
       PredSet->getOperand(2).setImm(Cond[1].getImm());
 
-      BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
+      BuildMI(&MBB, DL, get(R600::JUMP_COND))
              .addMBB(TBB)
-             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+             .addReg(R600::PREDICATE_BIT, RegState::Kill);
       MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
       if (CfAlu == MBB.end())
         return 1;
-      assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
-      CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
+      assert (CfAlu->getOpcode() == R600::CF_ALU);
+      CfAlu->setDesc(get(R600::CF_ALU_PUSH_BEFORE));
       return 1;
     }
   } else {
@@ -783,15 +788,15 @@
     assert(PredSet && "No previous predicate !");
     addFlag(*PredSet, 0, MO_FLAG_PUSH);
     PredSet->getOperand(2).setImm(Cond[1].getImm());
-    BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
+    BuildMI(&MBB, DL, get(R600::JUMP_COND))
             .addMBB(TBB)
-            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
-    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
+            .addReg(R600::PREDICATE_BIT, RegState::Kill);
+    BuildMI(&MBB, DL, get(R600::JUMP)).addMBB(FBB);
     MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
     if (CfAlu == MBB.end())
       return 2;
-    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
-    CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
+    assert (CfAlu->getOpcode() == R600::CF_ALU);
+    CfAlu->setDesc(get(R600::CF_ALU_PUSH_BEFORE));
     return 2;
   }
 }
@@ -812,18 +817,18 @@
   switch (I->getOpcode()) {
   default:
     return 0;
-  case AMDGPU::JUMP_COND: {
+  case R600::JUMP_COND: {
     MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
     clearFlag(*predSet, 0, MO_FLAG_PUSH);
     I->eraseFromParent();
     MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
     if (CfAlu == MBB.end())
       break;
-    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
-    CfAlu->setDesc(get(AMDGPU::CF_ALU));
+    assert (CfAlu->getOpcode() == R600::CF_ALU_PUSH_BEFORE);
+    CfAlu->setDesc(get(R600::CF_ALU));
     break;
   }
-  case AMDGPU::JUMP:
+  case R600::JUMP:
     I->eraseFromParent();
     break;
   }
@@ -837,18 +842,18 @@
     // FIXME: only one case??
   default:
     return 1;
-  case AMDGPU::JUMP_COND: {
+  case R600::JUMP_COND: {
     MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
     clearFlag(*predSet, 0, MO_FLAG_PUSH);
     I->eraseFromParent();
     MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
     if (CfAlu == MBB.end())
       break;
-    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
-    CfAlu->setDesc(get(AMDGPU::CF_ALU));
+    assert (CfAlu->getOpcode() == R600::CF_ALU_PUSH_BEFORE);
+    CfAlu->setDesc(get(R600::CF_ALU));
     break;
   }
-  case AMDGPU::JUMP:
+  case R600::JUMP:
     I->eraseFromParent();
     break;
   }
@@ -863,9 +868,9 @@
   unsigned Reg = MI.getOperand(idx).getReg();
   switch (Reg) {
   default: return false;
-  case AMDGPU::PRED_SEL_ONE:
-  case AMDGPU::PRED_SEL_ZERO:
-  case AMDGPU::PREDICATE_BIT:
+  case R600::PRED_SEL_ONE:
+  case R600::PRED_SEL_ZERO:
+  case R600::PREDICATE_BIT:
     return true;
   }
 }
@@ -876,9 +881,9 @@
   // be predicated.  Until we have proper support for instruction clauses in the
   // backend, we will mark KILL* instructions as unpredicable.
 
-  if (MI.getOpcode() == AMDGPU::KILLGT) {
+  if (MI.getOpcode() == R600::KILLGT) {
     return false;
-  } else if (MI.getOpcode() == AMDGPU::CF_ALU) {
+  } else if (MI.getOpcode() == R600::CF_ALU) {
     // If the clause start in the middle of MBB then the MBB has more
     // than a single clause, unable to predicate several clauses.
     if (MI.getParent()->begin() != MachineBasicBlock::const_iterator(MI))
@@ -888,7 +893,7 @@
   } else if (isVector(MI)) {
     return false;
   } else {
-    return AMDGPUInstrInfo::isPredicable(MI);
+    return TargetInstrInfo::isPredicable(MI);
   }
 }
 
@@ -929,17 +934,17 @@
 R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   MachineOperand &MO = Cond[1];
   switch (MO.getImm()) {
-  case AMDGPU::PRED_SETE_INT:
-    MO.setImm(AMDGPU::PRED_SETNE_INT);
+  case R600::PRED_SETE_INT:
+    MO.setImm(R600::PRED_SETNE_INT);
     break;
-  case AMDGPU::PRED_SETNE_INT:
-    MO.setImm(AMDGPU::PRED_SETE_INT);
+  case R600::PRED_SETNE_INT:
+    MO.setImm(R600::PRED_SETE_INT);
     break;
-  case AMDGPU::PRED_SETE:
-    MO.setImm(AMDGPU::PRED_SETNE);
+  case R600::PRED_SETE:
+    MO.setImm(R600::PRED_SETNE);
     break;
-  case AMDGPU::PRED_SETNE:
-    MO.setImm(AMDGPU::PRED_SETE);
+  case R600::PRED_SETNE:
+    MO.setImm(R600::PRED_SETE);
     break;
   default:
     return true;
@@ -947,11 +952,11 @@
 
   MachineOperand &MO2 = Cond[2];
   switch (MO2.getReg()) {
-  case AMDGPU::PRED_SEL_ZERO:
-    MO2.setReg(AMDGPU::PRED_SEL_ONE);
+  case R600::PRED_SEL_ZERO:
+    MO2.setReg(R600::PRED_SEL_ONE);
     break;
-  case AMDGPU::PRED_SEL_ONE:
-    MO2.setReg(AMDGPU::PRED_SEL_ZERO);
+  case R600::PRED_SEL_ONE:
+    MO2.setReg(R600::PRED_SEL_ZERO);
     break;
   default:
     return true;
@@ -968,22 +973,22 @@
                                          ArrayRef<MachineOperand> Pred) const {
   int PIdx = MI.findFirstPredOperandIdx();
 
-  if (MI.getOpcode() == AMDGPU::CF_ALU) {
+  if (MI.getOpcode() == R600::CF_ALU) {
     MI.getOperand(8).setImm(0);
     return true;
   }
 
-  if (MI.getOpcode() == AMDGPU::DOT_4) {
-    MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_X))
+  if (MI.getOpcode() == R600::DOT_4) {
+    MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_X))
         .setReg(Pred[2].getReg());
-    MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Y))
+    MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_Y))
         .setReg(Pred[2].getReg());
-    MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Z))
+    MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_Z))
         .setReg(Pred[2].getReg());
-    MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_W))
+    MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_W))
         .setReg(Pred[2].getReg());
     MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
-    MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
+    MIB.addReg(R600::PREDICATE_BIT, RegState::Implicit);
     return true;
   }
 
@@ -991,7 +996,7 @@
     MachineOperand &PMO = MI.getOperand(PIdx);
     PMO.setReg(Pred[2].getReg());
     MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
-    MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
+    MIB.addReg(R600::PREDICATE_BIT, RegState::Implicit);
     return true;
   }
 
@@ -1021,20 +1026,20 @@
   default: {
     MachineBasicBlock *MBB = MI.getParent();
     int OffsetOpIdx =
-        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::addr);
+        R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::addr);
     // addr is a custom operand with multiple MI operands, and only the
     // first MI operand is given a name.
     int RegOpIdx = OffsetOpIdx + 1;
     int ChanOpIdx =
-        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::chan);
+        R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::chan);
     if (isRegisterLoad(MI)) {
       int DstOpIdx =
-          AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
+          R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::dst);
       unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
       unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
       unsigned Address = calculateIndirectAddress(RegIndex, Channel);
       unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
-      if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
+      if (OffsetReg == R600::INDIRECT_BASE_ADDR) {
         buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(),
                       getIndirectAddrRegClass()->getRegister(Address));
       } else {
@@ -1043,12 +1048,12 @@
       }
     } else if (isRegisterStore(MI)) {
       int ValOpIdx =
-          AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::val);
+          R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::val);
       unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
       unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
       unsigned Address = calculateIndirectAddress(RegIndex, Channel);
       unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
-      if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
+      if (OffsetReg == R600::INDIRECT_BASE_ADDR) {
         buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
                       MI.getOperand(ValOpIdx).getReg());
       } else {
@@ -1063,15 +1068,15 @@
     MBB->erase(MI);
     return true;
   }
-  case AMDGPU::R600_EXTRACT_ELT_V2:
-  case AMDGPU::R600_EXTRACT_ELT_V4:
+  case R600::R600_EXTRACT_ELT_V2:
+  case R600::R600_EXTRACT_ELT_V4:
     buildIndirectRead(MI.getParent(), MI, MI.getOperand(0).getReg(),
                       RI.getHWRegIndex(MI.getOperand(1).getReg()), //  Address
                       MI.getOperand(2).getReg(),
                       RI.getHWRegChan(MI.getOperand(1).getReg()));
     break;
-  case AMDGPU::R600_INSERT_ELT_V2:
-  case AMDGPU::R600_INSERT_ELT_V4:
+  case R600::R600_INSERT_ELT_V2:
+  case R600::R600_INSERT_ELT_V4:
     buildIndirectWrite(MI.getParent(), MI, MI.getOperand(2).getReg(), // Value
                        RI.getHWRegIndex(MI.getOperand(1).getReg()),   // Address
                        MI.getOperand(3).getReg(),                     // Offset
@@ -1096,14 +1101,14 @@
 
   for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
     for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
-      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
+      unsigned Reg = R600::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
       TRI.reserveRegisterTuples(Reserved, Reg);
     }
   }
 }
 
 const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
-  return &AMDGPU::R600_TReg32_XRegClass;
+  return &R600::R600_TReg32_XRegClass;
 }
 
 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
@@ -1121,20 +1126,20 @@
   unsigned AddrReg;
   switch (AddrChan) {
     default: llvm_unreachable("Invalid Channel");
-    case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
-    case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
-    case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
-    case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
+    case 0: AddrReg = R600::R600_AddrRegClass.getRegister(Address); break;
+    case 1: AddrReg = R600::R600_Addr_YRegClass.getRegister(Address); break;
+    case 2: AddrReg = R600::R600_Addr_ZRegClass.getRegister(Address); break;
+    case 3: AddrReg = R600::R600_Addr_WRegClass.getRegister(Address); break;
   }
-  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
-                                               AMDGPU::AR_X, OffsetReg);
-  setImmOperand(*MOVA, AMDGPU::OpName::write, 0);
+  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, R600::MOVA_INT_eg,
+                                               R600::AR_X, OffsetReg);
+  setImmOperand(*MOVA, R600::OpName::write, 0);
 
-  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
+  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, R600::MOV,
                                       AddrReg, ValueReg)
-                                      .addReg(AMDGPU::AR_X,
+                                      .addReg(R600::AR_X,
                                            RegState::Implicit | RegState::Kill);
-  setImmOperand(*Mov, AMDGPU::OpName::dst_rel, 1);
+  setImmOperand(*Mov, R600::OpName::dst_rel, 1);
   return Mov;
 }
 
@@ -1153,21 +1158,21 @@
   unsigned AddrReg;
   switch (AddrChan) {
     default: llvm_unreachable("Invalid Channel");
-    case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
-    case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
-    case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
-    case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
+    case 0: AddrReg = R600::R600_AddrRegClass.getRegister(Address); break;
+    case 1: AddrReg = R600::R600_Addr_YRegClass.getRegister(Address); break;
+    case 2: AddrReg = R600::R600_Addr_ZRegClass.getRegister(Address); break;
+    case 3: AddrReg = R600::R600_Addr_WRegClass.getRegister(Address); break;
   }
-  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
-                                                       AMDGPU::AR_X,
+  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, R600::MOVA_INT_eg,
+                                                       R600::AR_X,
                                                        OffsetReg);
-  setImmOperand(*MOVA, AMDGPU::OpName::write, 0);
-  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
+  setImmOperand(*MOVA, R600::OpName::write, 0);
+  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, R600::MOV,
                                       ValueReg,
                                       AddrReg)
-                                      .addReg(AMDGPU::AR_X,
+                                      .addReg(R600::AR_X,
                                            RegState::Implicit | RegState::Kill);
-  setImmOperand(*Mov, AMDGPU::OpName::src0_rel, 1);
+  setImmOperand(*Mov, R600::OpName::src0_rel, 1);
 
   return Mov;
 }
@@ -1265,7 +1270,7 @@
   //XXX: The r600g finalizer expects this to be 1, once we've moved the
   //scheduling to the backend, we can change the default to 0.
   MIB.addImm(1)        // $last
-      .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
+      .addReg(R600::PRED_SEL_OFF) // $pred_sel
       .addImm(0)         // $literal
       .addImm(0);        // $bank_swizzle
 
@@ -1286,23 +1291,23 @@
 
 static unsigned getSlotedOps(unsigned  Op, unsigned Slot) {
   switch (Op) {
-  OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
-  OPERAND_CASE(AMDGPU::OpName::update_pred)
-  OPERAND_CASE(AMDGPU::OpName::write)
-  OPERAND_CASE(AMDGPU::OpName::omod)
-  OPERAND_CASE(AMDGPU::OpName::dst_rel)
-  OPERAND_CASE(AMDGPU::OpName::clamp)
-  OPERAND_CASE(AMDGPU::OpName::src0)
-  OPERAND_CASE(AMDGPU::OpName::src0_neg)
-  OPERAND_CASE(AMDGPU::OpName::src0_rel)
-  OPERAND_CASE(AMDGPU::OpName::src0_abs)
-  OPERAND_CASE(AMDGPU::OpName::src0_sel)
-  OPERAND_CASE(AMDGPU::OpName::src1)
-  OPERAND_CASE(AMDGPU::OpName::src1_neg)
-  OPERAND_CASE(AMDGPU::OpName::src1_rel)
-  OPERAND_CASE(AMDGPU::OpName::src1_abs)
-  OPERAND_CASE(AMDGPU::OpName::src1_sel)
-  OPERAND_CASE(AMDGPU::OpName::pred_sel)
+  OPERAND_CASE(R600::OpName::update_exec_mask)
+  OPERAND_CASE(R600::OpName::update_pred)
+  OPERAND_CASE(R600::OpName::write)
+  OPERAND_CASE(R600::OpName::omod)
+  OPERAND_CASE(R600::OpName::dst_rel)
+  OPERAND_CASE(R600::OpName::clamp)
+  OPERAND_CASE(R600::OpName::src0)
+  OPERAND_CASE(R600::OpName::src0_neg)
+  OPERAND_CASE(R600::OpName::src0_rel)
+  OPERAND_CASE(R600::OpName::src0_abs)
+  OPERAND_CASE(R600::OpName::src0_sel)
+  OPERAND_CASE(R600::OpName::src1)
+  OPERAND_CASE(R600::OpName::src1_neg)
+  OPERAND_CASE(R600::OpName::src1_rel)
+  OPERAND_CASE(R600::OpName::src1_abs)
+  OPERAND_CASE(R600::OpName::src1_sel)
+  OPERAND_CASE(R600::OpName::pred_sel)
   default:
     llvm_unreachable("Wrong Operand");
   }
@@ -1313,39 +1318,39 @@
 MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
     MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
     const {
-  assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
+  assert (MI->getOpcode() == R600::DOT_4 && "Not Implemented");
   unsigned Opcode;
   if (ST.getGeneration() <= R600Subtarget::R700)
-    Opcode = AMDGPU::DOT4_r600;
+    Opcode = R600::DOT4_r600;
   else
-    Opcode = AMDGPU::DOT4_eg;
+    Opcode = R600::DOT4_eg;
   MachineBasicBlock::iterator I = MI;
   MachineOperand &Src0 = MI->getOperand(
-      getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
+      getOperandIdx(MI->getOpcode(), getSlotedOps(R600::OpName::src0, Slot)));
   MachineOperand &Src1 = MI->getOperand(
-      getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
+      getOperandIdx(MI->getOpcode(), getSlotedOps(R600::OpName::src1, Slot)));
   MachineInstr *MIB = buildDefaultInstruction(
       MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
   static const unsigned  Operands[14] = {
-    AMDGPU::OpName::update_exec_mask,
-    AMDGPU::OpName::update_pred,
-    AMDGPU::OpName::write,
-    AMDGPU::OpName::omod,
-    AMDGPU::OpName::dst_rel,
-    AMDGPU::OpName::clamp,
-    AMDGPU::OpName::src0_neg,
-    AMDGPU::OpName::src0_rel,
-    AMDGPU::OpName::src0_abs,
-    AMDGPU::OpName::src0_sel,
-    AMDGPU::OpName::src1_neg,
-    AMDGPU::OpName::src1_rel,
-    AMDGPU::OpName::src1_abs,
-    AMDGPU::OpName::src1_sel,
+    R600::OpName::update_exec_mask,
+    R600::OpName::update_pred,
+    R600::OpName::write,
+    R600::OpName::omod,
+    R600::OpName::dst_rel,
+    R600::OpName::clamp,
+    R600::OpName::src0_neg,
+    R600::OpName::src0_rel,
+    R600::OpName::src0_abs,
+    R600::OpName::src0_sel,
+    R600::OpName::src1_neg,
+    R600::OpName::src1_rel,
+    R600::OpName::src1_abs,
+    R600::OpName::src1_sel,
   };
 
   MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
-      getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
-  MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
+      getSlotedOps(R600::OpName::pred_sel, Slot)));
+  MIB->getOperand(getOperandIdx(Opcode, R600::OpName::pred_sel))
       .setReg(MO.getReg());
 
   for (unsigned i = 0; i < 14; i++) {
@@ -1362,16 +1367,16 @@
                                          MachineBasicBlock::iterator I,
                                          unsigned DstReg,
                                          uint64_t Imm) const {
-  MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
-                                                  AMDGPU::ALU_LITERAL_X);
-  setImmOperand(*MovImm, AMDGPU::OpName::literal, Imm);
+  MachineInstr *MovImm = buildDefaultInstruction(BB, I, R600::MOV, DstReg,
+                                                  R600::ALU_LITERAL_X);
+  setImmOperand(*MovImm, R600::OpName::literal, Imm);
   return MovImm;
 }
 
 MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
                                        MachineBasicBlock::iterator I,
                                        unsigned DstReg, unsigned SrcReg) const {
-  return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
+  return buildDefaultInstruction(*MBB, I, R600::MOV, DstReg, SrcReg);
 }
 
 int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
@@ -1379,7 +1384,7 @@
 }
 
 int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
-  return AMDGPU::getNamedOperandIdx(Opcode, Op);
+  return R600::getNamedOperandIdx(Opcode, Op);
 }
 
 void R600InstrInfo::setImmOperand(MachineInstr &MI, unsigned Op,
@@ -1406,25 +1411,25 @@
     bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
     switch (Flag) {
     case MO_FLAG_CLAMP:
-      FlagIndex = getOperandIdx(MI, AMDGPU::OpName::clamp);
+      FlagIndex = getOperandIdx(MI, R600::OpName::clamp);
       break;
     case MO_FLAG_MASK:
-      FlagIndex = getOperandIdx(MI, AMDGPU::OpName::write);
+      FlagIndex = getOperandIdx(MI, R600::OpName::write);
       break;
     case MO_FLAG_NOT_LAST:
     case MO_FLAG_LAST:
-      FlagIndex = getOperandIdx(MI, AMDGPU::OpName::last);
+      FlagIndex = getOperandIdx(MI, R600::OpName::last);
       break;
     case MO_FLAG_NEG:
       switch (SrcIdx) {
       case 0:
-        FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_neg);
+        FlagIndex = getOperandIdx(MI, R600::OpName::src0_neg);
         break;
       case 1:
-        FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_neg);
+        FlagIndex = getOperandIdx(MI, R600::OpName::src1_neg);
         break;
       case 2:
-        FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src2_neg);
+        FlagIndex = getOperandIdx(MI, R600::OpName::src2_neg);
         break;
       }
       break;
@@ -1435,10 +1440,10 @@
       (void)IsOP3;
       switch (SrcIdx) {
       case 0:
-        FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_abs);
+        FlagIndex = getOperandIdx(MI, R600::OpName::src0_abs);
         break;
       case 1:
-        FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_abs);
+        FlagIndex = getOperandIdx(MI, R600::OpName::src1_abs);
         break;
       }
       break;
@@ -1499,15 +1504,15 @@
   switch (Kind) {
   case PseudoSourceValue::Stack:
   case PseudoSourceValue::FixedStack:
-    return AMDGPUASI.PRIVATE_ADDRESS;
+    return ST.getAMDGPUAS().PRIVATE_ADDRESS;
   case PseudoSourceValue::ConstantPool:
   case PseudoSourceValue::GOT:
   case PseudoSourceValue::JumpTable:
   case PseudoSourceValue::GlobalValueCallEntry:
   case PseudoSourceValue::ExternalSymbolCallEntry:
   case PseudoSourceValue::TargetCustom:
-    return AMDGPUASI.CONSTANT_ADDRESS;
+    return ST.getAMDGPUAS().CONSTANT_ADDRESS;
   }
   llvm_unreachable("Invalid pseudo source kind");
-  return AMDGPUASI.PRIVATE_ADDRESS;
+  return ST.getAMDGPUAS().PRIVATE_ADDRESS;
 }
Index: lib/Target/AMDGPU/R600Instructions.td
===================================================================
--- lib/Target/AMDGPU/R600Instructions.td
+++ lib/Target/AMDGPU/R600Instructions.td
@@ -18,13 +18,13 @@
 class R600WrapperInst <dag outs, dag ins, string asm = "", list<dag> pattern = []> :
   AMDGPUInst<outs, ins, asm, pattern>, PredicateControl {
   let SubtargetPredicate = isR600toCayman;
+  let Namespace = "R600";
 }
 
 
 class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern = []> :
     InstR600 <outs, ins, asm, pattern, NullALU> {
 
-  let Namespace = "AMDGPU";
 }
 
 def MEMxi : Operand<iPTR> {
@@ -86,6 +86,12 @@
 def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
                                      (ops PRED_SEL_OFF)>;
 
+let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
+    usesCustomInserter = 1, Namespace = "R600" in {
+  def RETURN : ILFormat<(outs), (ins variable_ops),
+    "RETURN", [(AMDGPUendpgm)]
+  >;
+}
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
 
@@ -219,34 +225,6 @@
 
 } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
 
-def TEX_SHADOW : PatLeaf<
-  (imm),
-  [{uint32_t TType = (uint32_t)N->getZExtValue();
-    return (TType >= 6 && TType <= 8) || TType == 13;
-  }]
->;
-
-def TEX_RECT : PatLeaf<
-  (imm),
-  [{uint32_t TType = (uint32_t)N->getZExtValue();
-    return TType == 5;
-  }]
->;
-
-def TEX_ARRAY : PatLeaf<
-  (imm),
-  [{uint32_t TType = (uint32_t)N->getZExtValue();
-    return TType == 9 || TType == 10 || TType == 16;
-  }]
->;
-
-def TEX_SHADOW_ARRAY : PatLeaf<
-  (imm),
-  [{uint32_t TType = (uint32_t)N->getZExtValue();
-    return TType == 11 || TType == 12 || TType == 17;
-  }]
->;
-
 class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask,
                  dag outs, dag ins, string asm, list<dag> pattern> :
     InstR600ISA <outs, ins, asm, pattern>,
@@ -357,6 +335,8 @@
 // R600 SDNodes
 //===----------------------------------------------------------------------===//
 
+let Namespace = "R600" in {
+
 def INTERP_PAIR_XY :  AMDGPUShaderInst <
   (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
   (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2),
@@ -369,6 +349,8 @@
   "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
   []>;
 
+}
+
 def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
   SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
   [SDNPVariadic]
@@ -416,11 +398,15 @@
 // Interpolation Instructions
 //===----------------------------------------------------------------------===//
 
+let Namespace = "R600" in {
+
 def INTERP_VEC_LOAD :  AMDGPUShaderInst <
   (outs R600_Reg128:$dst),
   (ins i32imm:$src0),
   "INTERP_LOAD $src0 : $dst">;
 
+}
+
 def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
   let bank_swizzle = 5;
 }
@@ -660,7 +646,7 @@
 
 let isCodeGenOnly = 1, isPseudo = 1 in {
 
-let usesCustomInserter = 1  in {
+let Namespace = "R600", usesCustomInserter = 1  in {
 
 class FABS <RegisterClass rc> : AMDGPUShaderInst <
   (outs rc:$dst),
@@ -792,7 +778,9 @@
   (ins immType:$imm),
   "",
   []
->;
+> {
+  let Namespace = "R600";
+}
 
 } // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
 
@@ -1007,7 +995,7 @@
 }
 
 
-let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"  in {
+let isCodeGenOnly = 1, isPseudo = 1, Namespace = "R600"  in {
 class R600_VEC2OP<list<dag> pattern> : InstR600 <(outs R600_Reg32:$dst), (ins
 // Slot X
    UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X,
@@ -1326,7 +1314,9 @@
 // Regist loads and stores - for indirect addressing
 //===----------------------------------------------------------------------===//
 
+let Namespace = "R600" in {
 defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
+}
 
 // Hardcode channel to 0
 // NOTE: LSHR is not available here. LSHR is per family instruction
@@ -1378,11 +1368,12 @@
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
 
-def MASK_WRITE : AMDGPUShaderInst <
+def MASK_WRITE : InstR600 <
     (outs),
     (ins R600_Reg32:$src),
     "MASK_WRITE $src",
-    []
+    [],
+    NullALU
 >;
 
 } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
@@ -1413,7 +1404,7 @@
 // Constant Buffer Addressing Support
 //===----------------------------------------------------------------------===//
 
-let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"  in {
+let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "R600"  in {
 def CONST_COPY : Instruction {
   let OutOperandList = (outs R600_Reg32:$dst);
   let InOperandList = (ins i32imm:$src);
@@ -1536,23 +1527,6 @@
 //===---------------------------------------------------------------------===//
 // Flow and Program control Instructions
 //===---------------------------------------------------------------------===//
-class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
-: Instruction {
-
-     let Namespace = "AMDGPU";
-     dag OutOperandList = outs;
-     dag InOperandList = ins;
-     let Pattern = pattern;
-     let AsmString = !strconcat(asmstr, "\n");
-     let isPseudo = 1;
-     let Itinerary = NullALU;
-     bit hasIEEEFlag = 0;
-     bit hasZeroOpFlag = 0;
-     let mayLoad = 0;
-     let mayStore = 0;
-     let hasSideEffects = 0;
-     let isCodeGenOnly = 1;
-}
 
 multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {
     def _i32 : ILFormat<(outs),
@@ -1584,23 +1558,14 @@
 // Custom Inserter for Branches and returns, this eventually will be a
 // separate pass
 //===---------------------------------------------------------------------===//
-let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
+let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1,
+    Namespace = "R600" in {
   def BRANCH : ILFormat<(outs), (ins brtarget:$target),
       "; Pseudo unconditional branch instruction",
       [(br bb:$target)]>;
   defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>;
 }
 
-//===---------------------------------------------------------------------===//
-// Return instruction
-//===---------------------------------------------------------------------===//
-let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
-    usesCustomInserter = 1 in {
-  def RETURN : ILFormat<(outs), (ins variable_ops),
-    "RETURN", [(AMDGPUendpgm)]
-  >;
-}
-
 //===----------------------------------------------------------------------===//
 // Branch Instructions
 //===----------------------------------------------------------------------===//
@@ -1731,7 +1696,7 @@
 
 // KIL Patterns
 def KIL : R600Pat <
-  (int_AMDGPU_kill f32:$src0),
+  (int_r600_kill f32:$src0),
   (MASK_WRITE (KILLGT (f32 ZERO), $src0))
 >;
 
Index: lib/Target/AMDGPU/R600MachineScheduler.cpp
===================================================================
--- lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -162,7 +162,7 @@
       for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
           E = SU->getInstr()->operands_end(); It != E; ++It) {
         MachineOperand &MO = *It;
-        if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
+        if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
           ++CurEmitted;
       }
     }
@@ -181,7 +181,7 @@
 
 static bool
 isPhysicalRegCopy(MachineInstr *MI) {
-  if (MI->getOpcode() != AMDGPU::COPY)
+  if (MI->getOpcode() != R600::COPY)
     return false;
 
   return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg());
@@ -224,14 +224,14 @@
     return AluTrans;
 
   switch (MI->getOpcode()) {
-  case AMDGPU::PRED_X:
+  case R600::PRED_X:
     return AluPredX;
-  case AMDGPU::INTERP_PAIR_XY:
-  case AMDGPU::INTERP_PAIR_ZW:
-  case AMDGPU::INTERP_VEC_LOAD:
-  case AMDGPU::DOT_4:
+  case R600::INTERP_PAIR_XY:
+  case R600::INTERP_PAIR_ZW:
+  case R600::INTERP_VEC_LOAD:
+  case R600::DOT_4:
     return AluT_XYZW;
-  case AMDGPU::COPY:
+  case R600::COPY:
     if (MI->getOperand(1).isUndef()) {
       // MI will become a KILL, don't considers it in scheduling
       return AluDiscarded;
@@ -246,7 +246,7 @@
   if(TII->isVector(*MI) ||
      TII->isCubeOp(MI->getOpcode()) ||
      TII->isReductionOp(MI->getOpcode()) ||
-     MI->getOpcode() == AMDGPU::GROUP_BARRIER) {
+     MI->getOpcode() == R600::GROUP_BARRIER) {
     return AluT_XYZW;
   }
 
@@ -257,13 +257,13 @@
   // Is the result already assigned to a channel ?
   unsigned DestSubReg = MI->getOperand(0).getSubReg();
   switch (DestSubReg) {
-  case AMDGPU::sub0:
+  case R600::sub0:
     return AluT_X;
-  case AMDGPU::sub1:
+  case R600::sub1:
     return AluT_Y;
-  case AMDGPU::sub2:
+  case R600::sub2:
     return AluT_Z;
-  case AMDGPU::sub3:
+  case R600::sub3:
     return AluT_W;
   default:
     break;
@@ -271,16 +271,16 @@
 
   // Is the result already member of a X/Y/Z/W class ?
   unsigned DestReg = MI->getOperand(0).getReg();
-  if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
-      regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
+  if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) ||
+      regBelongsToClass(DestReg, &R600::R600_AddrRegClass))
     return AluT_X;
-  if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
+  if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass))
     return AluT_Y;
-  if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
+  if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass))
     return AluT_Z;
-  if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
+  if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass))
     return AluT_W;
-  if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
+  if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass))
     return AluT_XYZW;
 
   // LDS src registers cannot be used in the Trans slot.
@@ -301,13 +301,13 @@
   }
 
   switch (Opcode) {
-  case AMDGPU::PRED_X:
-  case AMDGPU::COPY:
-  case AMDGPU::CONST_COPY:
-  case AMDGPU::INTERP_PAIR_XY:
-  case AMDGPU::INTERP_PAIR_ZW:
-  case AMDGPU::INTERP_VEC_LOAD:
-  case AMDGPU::DOT_4:
+  case R600::PRED_X:
+  case R600::COPY:
+  case R600::CONST_COPY:
+  case R600::INTERP_PAIR_XY:
+  case R600::INTERP_PAIR_ZW:
+  case R600::INTERP_VEC_LOAD:
+  case R600::DOT_4:
     return IDAlu;
   default:
     return IDOther;
@@ -353,7 +353,7 @@
 }
 
 void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
-  int DstIndex = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+  int DstIndex = TII->getOperandIdx(MI->getOpcode(), R600::OpName::dst);
   if (DstIndex == -1) {
     return;
   }
@@ -370,16 +370,16 @@
   // Constrains the regclass of DestReg to assign it to Slot
   switch (Slot) {
   case 0:
-    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
+    MRI->constrainRegClass(DestReg, &R600::R600_TReg32_XRegClass);
     break;
   case 1:
-    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
+    MRI->constrainRegClass(DestReg, &R600::R600_TReg32_YRegClass);
     break;
   case 2:
-    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
+    MRI->constrainRegClass(DestReg, &R600::R600_TReg32_ZRegClass);
     break;
   case 3:
-    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
+    MRI->constrainRegClass(DestReg, &R600::R600_TReg32_WRegClass);
     break;
   }
 }
Index: lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
===================================================================
--- lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -79,7 +79,7 @@
   std::vector<unsigned> UndefReg;
 
   RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
-    assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE);
+    assert(MI->getOpcode() == R600::REG_SEQUENCE);
     for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) {
       MachineOperand &MO = Instr->getOperand(i);
       unsigned Chan = Instr->getOperand(i + 1).getImm();
@@ -159,8 +159,8 @@
   if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
     return true;
   switch (MI.getOpcode()) {
-  case AMDGPU::R600_ExportSwz:
-  case AMDGPU::EG_ExportSwz:
+  case R600::R600_ExportSwz:
+  case R600::EG_ExportSwz:
     return true;
   default:
     return false;
@@ -213,12 +213,12 @@
   std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;
   for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(),
       E = RSI->RegToChan.end(); It != E; ++It) {
-    unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+    unsigned DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass);
     unsigned SubReg = (*It).first;
     unsigned Swizzle = (*It).second;
     unsigned Chan = getReassignedChan(RemapChan, Swizzle);
 
-    MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG),
+    MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(R600::INSERT_SUBREG),
         DstReg)
         .addReg(SrcVec)
         .addReg(SubReg)
@@ -234,7 +234,7 @@
     SrcVec = DstReg;
   }
   MachineInstr *NewMI =
-      BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg).addReg(SrcVec);
+      BuildMI(MBB, Pos, DL, TII->get(R600::COPY), Reg).addReg(SrcVec);
   LLVM_DEBUG(dbgs() << "    ->"; NewMI->dump(););
 
   LLVM_DEBUG(dbgs() << "  Updating Swizzle:\n");
@@ -354,7 +354,7 @@
     for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
          MII != MIIE; ++MII) {
       MachineInstr &MI = *MII;
-      if (MI.getOpcode() != AMDGPU::REG_SEQUENCE) {
+      if (MI.getOpcode() != R600::REG_SEQUENCE) {
         if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
           unsigned Reg = MI.getOperand(1).getReg();
           for (MachineRegisterInfo::def_instr_iterator
Index: lib/Target/AMDGPU/R600Packetizer.cpp
===================================================================
--- lib/Target/AMDGPU/R600Packetizer.cpp
+++ lib/Target/AMDGPU/R600Packetizer.cpp
@@ -84,39 +84,39 @@
       LastDstChan = BISlot;
       if (TII->isPredicated(*BI))
         continue;
-      int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
+      int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write);
       if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
         continue;
-      int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
+      int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst);
       if (DstIdx == -1) {
         continue;
       }
       unsigned Dst = BI->getOperand(DstIdx).getReg();
       if (isTrans || TII->isTransOnly(*BI)) {
-        Result[Dst] = AMDGPU::PS;
+        Result[Dst] = R600::PS;
         continue;
       }
-      if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
-          BI->getOpcode() == AMDGPU::DOT4_eg) {
-        Result[Dst] = AMDGPU::PV_X;
+      if (BI->getOpcode() == R600::DOT4_r600 ||
+          BI->getOpcode() == R600::DOT4_eg) {
+        Result[Dst] = R600::PV_X;
         continue;
       }
-      if (Dst == AMDGPU::OQAP) {
+      if (Dst == R600::OQAP) {
         continue;
       }
       unsigned PVReg = 0;
       switch (TRI.getHWRegChan(Dst)) {
       case 0:
-        PVReg = AMDGPU::PV_X;
+        PVReg = R600::PV_X;
         break;
       case 1:
-        PVReg = AMDGPU::PV_Y;
+        PVReg = R600::PV_Y;
         break;
       case 2:
-        PVReg = AMDGPU::PV_Z;
+        PVReg = R600::PV_Z;
         break;
       case 3:
-        PVReg = AMDGPU::PV_W;
+        PVReg = R600::PV_W;
         break;
       default:
         llvm_unreachable("Invalid Chan");
@@ -129,9 +129,9 @@
   void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs)
       const {
     unsigned Ops[] = {
-      AMDGPU::OpName::src0,
-      AMDGPU::OpName::src1,
-      AMDGPU::OpName::src2
+      R600::OpName::src0,
+      R600::OpName::src1,
+      R600::OpName::src2
     };
     for (unsigned i = 0; i < 3; i++) {
       int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]);
@@ -171,7 +171,7 @@
       return true;
     if (!TII->isALUInstr(MI.getOpcode()))
       return true;
-    if (MI.getOpcode() == AMDGPU::GROUP_BARRIER)
+    if (MI.getOpcode() == R600::GROUP_BARRIER)
       return true;
     // XXX: This can be removed once the packetizer properly handles all the
     // LDS instruction group restrictions.
@@ -185,8 +185,8 @@
     if (getSlot(*MII) == getSlot(*MIJ))
       ConsideredInstUsesAlreadyWrittenVectorElement = true;
     // Does MII and MIJ share the same pred_sel ?
-    int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
-        OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
+    int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel),
+        OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel);
     unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
         PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
     if (PredI != PredJ)
@@ -220,7 +220,7 @@
   }
 
   void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
-    unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last);
+    unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last);
     MI->getOperand(LastOp).setImm(Bit);
   }
 
@@ -301,11 +301,11 @@
       for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
         MachineInstr *MI = CurrentPacketMIs[i];
         unsigned Op = TII->getOperandIdx(MI->getOpcode(),
-            AMDGPU::OpName::bank_swizzle);
+            R600::OpName::bank_swizzle);
         MI->getOperand(Op).setImm(BS[i]);
       }
       unsigned Op =
-          TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::bank_swizzle);
+          TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle);
       MI.getOperand(Op).setImm(BS.back());
       if (!CurrentPacketMIs.empty())
         setIsLastBit(CurrentPacketMIs.back(), 0);
@@ -334,6 +334,7 @@
 
   // DFA state table should not be empty.
   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
+  assert(Packetizer.getResourceTracker()->getInstrItins());
 
   if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty())
     return false;
@@ -353,8 +354,8 @@
     MachineBasicBlock::iterator End = MBB->end();
     MachineBasicBlock::iterator MI = MBB->begin();
     while (MI != End) {
-      if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF ||
-          (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
+      if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF ||
+          (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) {
         MachineBasicBlock::iterator DeleteMI = MI;
         ++MI;
         MBB->erase(DeleteMI);
Index: lib/Target/AMDGPU/R600Processors.td
===================================================================
--- lib/Target/AMDGPU/R600Processors.td
+++ lib/Target/AMDGPU/R600Processors.td
@@ -7,6 +7,62 @@
 //
 //===----------------------------------------------------------------------===//
 
+class SubtargetFeatureFetchLimit <string Value> :
+                          SubtargetFeature <"fetch"#Value,
+  "TexVTXClauseSize",
+  Value,
+  "Limit the maximum number of fetches in a clause to "#Value
+>;
+
+def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
+  "R600ALUInst",
+  "false",
+  "Older version of ALU instructions encoding"
+>;
+
+def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
+def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
+
+def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
+  "HasVertexCache",
+  "true",
+  "Specify use of dedicated vertex cache"
+>;
+
+def FeatureCaymanISA : SubtargetFeature<"caymanISA",
+  "CaymanISA",
+  "true",
+  "Use Cayman ISA"
+>;
+
+def FeatureCFALUBug : SubtargetFeature<"cfalubug",
+  "CFALUBug",
+  "true",
+  "GPU has CF_ALU bug"
+>;
+
+class R600SubtargetFeatureGeneration <string Value,
+                                  list<SubtargetFeature> Implies> :
+        SubtargetFeatureGeneration <Value, "R600Subtarget", Implies>;
+
+def FeatureR600 : R600SubtargetFeatureGeneration<"R600",
+  [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
+>;
+
+def FeatureR700 : R600SubtargetFeatureGeneration<"R700",
+  [FeatureFetchLimit16, FeatureLocalMemorySize0]
+>;
+
+def FeatureEvergreen : R600SubtargetFeatureGeneration<"EVERGREEN",
+  [FeatureFetchLimit16, FeatureLocalMemorySize32768]
+>;
+
+def FeatureNorthernIslands : R600SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
+  [FeatureFetchLimit16, FeatureWavefrontSize64,
+   FeatureLocalMemorySize32768]
+>;
+
+
 //===----------------------------------------------------------------------===//
 // Radeon HD 2000/3000 Series (R600).
 //===----------------------------------------------------------------------===//
Index: lib/Target/AMDGPU/R600RegisterInfo.h
===================================================================
--- lib/Target/AMDGPU/R600RegisterInfo.h
+++ lib/Target/AMDGPU/R600RegisterInfo.h
@@ -15,13 +15,14 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_R600REGISTERINFO_H
 #define LLVM_LIB_TARGET_AMDGPU_R600REGISTERINFO_H
 
-#include "AMDGPURegisterInfo.h"
+#define GET_REGINFO_HEADER
+#include "R600GenRegisterInfo.inc"
 
 namespace llvm {
 
 class AMDGPUSubtarget;
 
-struct R600RegisterInfo final : public AMDGPURegisterInfo {
+struct R600RegisterInfo final : public R600GenRegisterInfo {
   RegClassWeight RCW;
 
   R600RegisterInfo();
@@ -49,6 +50,8 @@
   void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
+
+  void reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const;
 };
 
 } // End namespace llvm
Index: lib/Target/AMDGPU/R600RegisterInfo.cpp
===================================================================
--- lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -21,34 +21,37 @@
 
 using namespace llvm;
 
-R600RegisterInfo::R600RegisterInfo() : AMDGPURegisterInfo() {
+R600RegisterInfo::R600RegisterInfo() : R600GenRegisterInfo(0) {
   RCW.RegWeight = 0;
   RCW.WeightLimit = 0;
 }
 
+#define GET_REGINFO_TARGET_DESC
+#include "R600GenRegisterInfo.inc"
+
 BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
 
   const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
   const R600InstrInfo *TII = ST.getInstrInfo();
 
-  reserveRegisterTuples(Reserved, AMDGPU::ZERO);
-  reserveRegisterTuples(Reserved, AMDGPU::HALF);
-  reserveRegisterTuples(Reserved, AMDGPU::ONE);
-  reserveRegisterTuples(Reserved, AMDGPU::ONE_INT);
-  reserveRegisterTuples(Reserved, AMDGPU::NEG_HALF);
-  reserveRegisterTuples(Reserved, AMDGPU::NEG_ONE);
-  reserveRegisterTuples(Reserved, AMDGPU::PV_X);
-  reserveRegisterTuples(Reserved, AMDGPU::ALU_LITERAL_X);
-  reserveRegisterTuples(Reserved, AMDGPU::ALU_CONST);
-  reserveRegisterTuples(Reserved, AMDGPU::PREDICATE_BIT);
-  reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_OFF);
-  reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_ZERO);
-  reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_ONE);
-  reserveRegisterTuples(Reserved, AMDGPU::INDIRECT_BASE_ADDR);
-
-  for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
-                        E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
+  reserveRegisterTuples(Reserved, R600::ZERO);
+  reserveRegisterTuples(Reserved, R600::HALF);
+  reserveRegisterTuples(Reserved, R600::ONE);
+  reserveRegisterTuples(Reserved, R600::ONE_INT);
+  reserveRegisterTuples(Reserved, R600::NEG_HALF);
+  reserveRegisterTuples(Reserved, R600::NEG_ONE);
+  reserveRegisterTuples(Reserved, R600::PV_X);
+  reserveRegisterTuples(Reserved, R600::ALU_LITERAL_X);
+  reserveRegisterTuples(Reserved, R600::ALU_CONST);
+  reserveRegisterTuples(Reserved, R600::PREDICATE_BIT);
+  reserveRegisterTuples(Reserved, R600::PRED_SEL_OFF);
+  reserveRegisterTuples(Reserved, R600::PRED_SEL_ZERO);
+  reserveRegisterTuples(Reserved, R600::PRED_SEL_ONE);
+  reserveRegisterTuples(Reserved, R600::INDIRECT_BASE_ADDR);
+
+  for (TargetRegisterClass::iterator I = R600::R600_AddrRegClass.begin(),
+                        E = R600::R600_AddrRegClass.end(); I != E; ++I) {
     reserveRegisterTuples(Reserved, *I);
   }
 
@@ -58,7 +61,7 @@
 }
 
 // Dummy to not crash RegisterClassInfo.
-static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister;
+static const MCPhysReg CalleeSavedReg = R600::NoRegister;
 
 const MCPhysReg *R600RegisterInfo::getCalleeSavedRegs(
   const MachineFunction *) const {
@@ -66,7 +69,7 @@
 }
 
 unsigned R600RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  return AMDGPU::NoRegister;
+  return R600::NoRegister;
 }
 
 unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
@@ -81,7 +84,7 @@
                                                                    MVT VT) const {
   switch(VT.SimpleTy) {
   default:
-  case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
+  case MVT::i32: return &R600::R600_TReg32RegClass;
   }
 }
 
@@ -94,9 +97,9 @@
   assert(!TargetRegisterInfo::isVirtualRegister(Reg));
 
   switch (Reg) {
-  case AMDGPU::OQAP:
-  case AMDGPU::OQBP:
-  case AMDGPU::AR_X:
+  case R600::OQAP:
+  case R600::OQBP:
+  case R600::AR_X:
     return false;
   default:
     return true;
@@ -109,3 +112,10 @@
                                            RegScavenger *RS) const {
   llvm_unreachable("Subroutines not supported yet");
 }
+
+void R600RegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
+  MCRegAliasIterator R(Reg, this, true);
+
+  for (; R.isValid(); ++R)
+    Reserved.set(*R);
+}
Index: lib/Target/AMDGPU/R600RegisterInfo.td
===================================================================
--- lib/Target/AMDGPU/R600RegisterInfo.td
+++ lib/Target/AMDGPU/R600RegisterInfo.td
@@ -245,7 +245,7 @@
   (add V0123_W, V0123_Z, V0123_Y, V0123_X)
 >;
 
-def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
+def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32, i64, f64], 64,
                                 (add (sequence "T%u_XY", 0, 63))>;
 
 def R600_Reg64Vertical : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
Index: lib/Target/AMDGPU/R700Instructions.td
===================================================================
--- lib/Target/AMDGPU/R700Instructions.td
+++ lib/Target/AMDGPU/R700Instructions.td
@@ -13,7 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-def isR700 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::R700">;
+def isR700 : Predicate<"Subtarget->getGeneration() == R600Subtarget::R700">;
 
 let Predicates = [isR700] in {
   def SIN_r700 : SIN_Common<0x6E>;
Index: lib/Target/AMDGPU/SIFoldOperands.cpp
===================================================================
--- lib/Target/AMDGPU/SIFoldOperands.cpp
+++ lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -76,7 +76,7 @@
   MachineRegisterInfo *MRI;
   const SIInstrInfo *TII;
   const SIRegisterInfo *TRI;
-  const SISubtarget *ST;
+  const AMDGPUSubtarget *ST;
 
   void foldOperand(MachineOperand &OpToFold,
                    MachineInstr *UseMI,
@@ -972,7 +972,7 @@
     return false;
 
   MRI = &MF.getRegInfo();
-  ST = &MF.getSubtarget<SISubtarget>();
+  ST = &MF.getSubtarget<AMDGPUSubtarget>();
   TII = ST->getInstrInfo();
   TRI = &TII->getRegisterInfo();
 
Index: lib/Target/AMDGPU/SIISelLowering.h
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.h
+++ lib/Target/AMDGPU/SIISelLowering.h
@@ -22,6 +22,9 @@
 namespace llvm {
 
 class SITargetLowering final : public AMDGPUTargetLowering {
+private:
+  const SISubtarget *Subtarget;
+
   SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
                                    SDValue Chain, uint64_t Offset) const;
   SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -113,7 +113,8 @@
 
 SITargetLowering::SITargetLowering(const TargetMachine &TM,
                                    const SISubtarget &STI)
-    : AMDGPUTargetLowering(TM, STI) {
+    : AMDGPUTargetLowering(TM, STI),
+      Subtarget(&STI) {
   addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
   addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
 
@@ -145,7 +146,7 @@
     addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32_XM0RegClass);
   }
 
-  computeRegisterProperties(STI.getRegisterInfo());
+  computeRegisterProperties(Subtarget->getRegisterInfo());
 
   // We need to custom lower vector stores from local memory
   setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
@@ -312,7 +313,7 @@
   setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Expand);
   setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Expand);
 
-  if (getSubtarget()->hasFlatAddressSpace()) {
+  if (Subtarget->hasFlatAddressSpace()) {
     setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
     setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
   }
@@ -325,6 +326,44 @@
   setOperationAction(ISD::TRAP, MVT::Other, Custom);
   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Custom);
 
+  if (Subtarget->has16BitInsts()) {
+    setOperationAction(ISD::FLOG, MVT::f16, Custom);
+    setOperationAction(ISD::FLOG10, MVT::f16, Custom);
+  }
+
+  // v_mad_f32 does not support denormals according to some sources.
+  if (!Subtarget->hasFP32Denormals())
+    setOperationAction(ISD::FMAD, MVT::f32, Legal);
+
+  if (!Subtarget->hasBFI()) {
+    // fcopysign can be done in a single instruction with BFI.
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+  }
+
+  if (!Subtarget->hasBCNT(32))
+    setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+
+  if (!Subtarget->hasBCNT(64))
+    setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+
+  if (Subtarget->hasFFBH())
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
+
+  if (Subtarget->hasFFBL())
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
+
+  // We only really have 32-bit BFE instructions (and 16-bit on VI).
+  //
+  // On SI+ there are 64-bit BFEs, but they are scalar only and there isn't any
+  // effort to match them now. We want this to be false for i64 cases when the
+  // extraction isn't restricted to the upper or lower half. Ideally we would
+  // have some pass reduce 64-bit extracts to 32-bit if possible. Extracts that
+  // span the midpoint are probably relatively rare, so don't worry about them
+  // for now.
+  if (Subtarget->hasBFE())
+    setHasExtractBitsInsn(true);
+
   setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
   setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
 
@@ -332,6 +371,11 @@
     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
     setOperationAction(ISD::FCEIL, MVT::f64, Legal);
     setOperationAction(ISD::FRINT, MVT::f64, Legal);
+  } else {
+    setOperationAction(ISD::FCEIL, MVT::f64, Custom);
+    setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
+    setOperationAction(ISD::FRINT, MVT::f64, Custom);
+    setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
   }
 
   setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
@@ -575,10 +619,15 @@
   setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX);
 
   setSchedulingPreference(Sched::RegPressure);
+
+  // SI at least has hardware support for floating point exceptions, but no way
+  // of using or handling them is implemented. They are also optional in OpenCL
+  // (Section 7.3)
+  setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
 }
 
 const SISubtarget *SITargetLowering::getSubtarget() const {
-  return static_cast<const SISubtarget *>(Subtarget);
+  return Subtarget;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1944,8 +1993,7 @@
 
   // FIXME: Does sret work properly?
   if (!Info->isEntryFunction()) {
-    const SIRegisterInfo *TRI
-      = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
+    const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
     const MCPhysReg *I =
       TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
     if (I) {
@@ -2047,8 +2095,7 @@
   SelectionDAG &DAG = CLI.DAG;
   const SDLoc &DL = CLI.DL;
 
-  const SISubtarget *ST = getSubtarget();
-  const SIRegisterInfo *TRI = ST->getRegisterInfo();
+  const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
 
   auto &ArgUsageInfo =
     DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
@@ -2486,7 +2533,7 @@
 
   // Add a register mask operand representing the call-preserved registers.
 
-  const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
+  auto *TRI = static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
   assert(Mask && "Missing call preserved mask for calling convention");
   Ops.push_back(DAG.getRegisterMask(Mask));
@@ -7695,8 +7742,7 @@
   MachineRegisterInfo &MRI = MF.getRegInfo();
   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
   const MachineFrameInfo &MFI = MF.getFrameInfo();
-  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
-  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
 
   if (Info->isEntryFunction()) {
     // Callable functions have fixed registers used for stack access.
Index: lib/Target/AMDGPU/SIInsertWaitcnts.cpp
===================================================================
--- lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -930,8 +930,7 @@
   // All waits must be resolved at call return.
   // NOTE: this could be improved with knowledge of all call sites or
   //   with knowledge of the called routines.
-  if (MI.getOpcode() == AMDGPU::RETURN ||
-      MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
+  if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
       MI.getOpcode() == AMDGPU::S_SETPC_B64_return) {
     for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
          T = (enum InstCounterType)(T + 1)) {
@@ -1127,7 +1126,7 @@
   // TODO: Remove this work-around, enable the assert for Bug 457939
   //       after fixing the scheduler. Also, the Shader Compiler code is
   //       independent of target.
-  if (readsVCCZ(MI) && ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
+  if (readsVCCZ(MI) && ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) {
     if (ScoreBrackets->getScoreLB(LGKM_CNT) <
             ScoreBrackets->getScoreUB(LGKM_CNT) &&
         ScoreBrackets->hasPendingSMEM()) {
@@ -1712,7 +1711,7 @@
       if (ScoreBrackets->getScoreLB(LGKM_CNT) <
               ScoreBrackets->getScoreUB(LGKM_CNT) &&
           ScoreBrackets->hasPendingSMEM()) {
-        if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS)
+        if (ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
           VCCZBugWorkAround = true;
       }
     }
Index: lib/Target/AMDGPU/SIInstrFormats.td
===================================================================
--- lib/Target/AMDGPU/SIInstrFormats.td
+++ lib/Target/AMDGPU/SIInstrFormats.td
@@ -21,7 +21,7 @@
 
 class InstSI <dag outs, dag ins, string asm = "",
               list<dag> pattern = []> :
-  AMDGPUInst<outs, ins, asm, pattern>, PredicateControl {
+  AMDGPUInst<outs, ins, asm, pattern>, GCNPredicateControl {
   let SubtargetPredicate = isGCN;
 
   // Low bits - basic encoding information.
Index: lib/Target/AMDGPU/SIInstrInfo.h
===================================================================
--- lib/Target/AMDGPU/SIInstrInfo.h
+++ lib/Target/AMDGPU/SIInstrInfo.h
@@ -31,6 +31,9 @@
 #include <cassert>
 #include <cstdint>
 
+#define GET_INSTRINFO_HEADER
+#include "AMDGPUGenInstrInfo.inc"
+
 namespace llvm {
 
 class APInt;
@@ -39,7 +42,7 @@
 class SISubtarget;
 class TargetRegisterClass;
 
-class SIInstrInfo final : public AMDGPUInstrInfo {
+class SIInstrInfo final : public AMDGPUGenInstrInfo {
 private:
   const SIRegisterInfo RI;
   const SISubtarget &ST;
@@ -163,7 +166,10 @@
 
   bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,
                            MachineInstr &SecondLdSt, unsigned BaseReg2,
-                           unsigned NumLoads) const final;
+                           unsigned NumLoads) const override;
+
+  bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
+                               int64_t Offset1, unsigned NumLoads) const override;
 
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                    const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
@@ -879,6 +885,12 @@
   static bool isLegalMUBUFImmOffset(unsigned Imm) {
     return isUInt<12>(Imm);
   }
+
+  /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
+  /// Return -1 if the target-specific opcode for the pseudo instruction does
+  /// not exist. If Opcode is not a pseudo instruction, this is identity.
+  int pseudoToMCOpcode(int Opcode) const;
+
 };
 
 namespace AMDGPU {
Index: lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- lib/Target/AMDGPU/SIInstrInfo.cpp
+++ lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -14,6 +14,7 @@
 
 #include "SIInstrInfo.h"
 #include "AMDGPU.h"
+#include "AMDGPUIntrinsicInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "GCNHazardRecognizer.h"
 #include "SIDefines.h"
@@ -63,6 +64,20 @@
 
 using namespace llvm;
 
+#define GET_INSTRINFO_CTOR_DTOR
+#include "AMDGPUGenInstrInfo.inc"
+
+namespace llvm {
+namespace AMDGPU {
+#define GET_RSRCINTRINSIC_IMPL
+#include "AMDGPUGenSearchableTables.inc"
+
+#define GET_D16IMAGEDIMINTRINSIC_IMPL
+#include "AMDGPUGenSearchableTables.inc"
+}
+}
+
+
 // Must be at least 4 to be able to branch over minimum unconditional branch
 // code. This is only for making it possible to write reasonably small tests for
 // long branches.
@@ -71,7 +86,8 @@
                  cl::desc("Restrict range of branch instructions (DEBUG)"));
 
 SIInstrInfo::SIInstrInfo(const SISubtarget &ST)
-  : AMDGPUInstrInfo(ST), RI(ST), ST(ST) {}
+  : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
+    RI(ST), ST(ST) {}
 
 //===----------------------------------------------------------------------===//
 // TargetInstrInfo callbacks
@@ -438,6 +454,28 @@
   return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
 }
 
+// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
+// the first 16 loads will be interleaved with the stores, and the next 16 will
+// be clustered as expected. It should really split into 2 16 store batches.
+//
+// Loads are clustered until this returns false, rather than trying to schedule
+// groups of stores. This also means we have to deal with saying different
+// address space loads should be clustered, and ones which might cause bank
+// conflicts.
+//
+// This might be deprecated so it might not be worth that much effort to fix.
+bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
+                                          int64_t Offset0, int64_t Offset1,
+                                          unsigned NumLoads) const {
+  assert(Offset1 > Offset0 &&
+         "Second offset should be larger than first offset!");
+  // If we have less than 16 loads in a row, and the offsets are within 64
+  // bytes, then schedule together.
+
+  // A cacheline is 64 bytes (for global memory).
+  return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
+}
+
 static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MI,
                               const DebugLoc &DL, unsigned DestReg,
@@ -998,7 +1036,7 @@
     unsigned FrameOffset, unsigned Size) const {
   MachineFunction *MF = MBB.getParent();
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
-  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
+  const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
   DebugLoc DL = MBB.findDebugLoc(MI);
   unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
   unsigned WavefrontSize = ST.getWavefrontSize();
@@ -1134,7 +1172,7 @@
   MachineBasicBlock &MBB = *MI.getParent();
   DebugLoc DL = MBB.findDebugLoc(MI);
   switch (MI.getOpcode()) {
-  default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
+  default: return TargetInstrInfo::expandPostRAPseudo(MI);
   case AMDGPU::S_MOV_B64_term:
     // This is only a terminator to get the correct spill code placement during
     // register allocation.
@@ -1900,16 +1938,16 @@
   switch(Kind) {
   case PseudoSourceValue::Stack:
   case PseudoSourceValue::FixedStack:
-    return AMDGPUASI.PRIVATE_ADDRESS;
+    return ST.getAMDGPUAS().PRIVATE_ADDRESS;
   case PseudoSourceValue::ConstantPool:
   case PseudoSourceValue::GOT:
   case PseudoSourceValue::JumpTable:
   case PseudoSourceValue::GlobalValueCallEntry:
   case PseudoSourceValue::ExternalSymbolCallEntry:
   case PseudoSourceValue::TargetCustom:
-    return AMDGPUASI.CONSTANT_ADDRESS;
+    return ST.getAMDGPUAS().CONSTANT_ADDRESS;
   }
-  return AMDGPUASI.FLAT_ADDRESS;
+  return ST.getAMDGPUAS().FLAT_ADDRESS;
 }
 
 static void removeModOperands(MachineInstr &MI) {
@@ -4649,7 +4687,7 @@
     return AMDGPU::NoRegister;
 
   assert(!MI.memoperands_empty() &&
-         (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS);
+         (*MI.memoperands_begin())->getAddrSpace() == ST.getAMDGPUAS().PRIVATE_ADDRESS);
 
   FrameIndex = Addr->getIndex();
   return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
@@ -4768,7 +4806,7 @@
     return true;
 
   for (const MachineMemOperand *MMO : MI.memoperands()) {
-    if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS)
+    if (MMO->getAddrSpace() == ST.getAMDGPUAS().FLAT_ADDRESS)
       return true;
   }
   return false;
@@ -4948,3 +4986,56 @@
   const auto RCID = MI.getDesc().OpInfo[Idx].RegClass;
   return RCID == AMDGPU::SReg_128RegClassID;
 }
+
+// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
+enum SIEncodingFamily {
+  SI = 0,
+  VI = 1,
+  SDWA = 2,
+  SDWA9 = 3,
+  GFX80 = 4,
+  GFX9 = 5
+};
+
+static SIEncodingFamily subtargetEncodingFamily(const SISubtarget &ST) {
+  switch (ST.getGeneration()) {
+  case SISubtarget::SOUTHERN_ISLANDS:
+  case SISubtarget::SEA_ISLANDS:
+    return SIEncodingFamily::SI;
+  case SISubtarget::VOLCANIC_ISLANDS:
+  case SISubtarget::GFX9:
+    return SIEncodingFamily::VI;
+  }
+  llvm_unreachable("Unknown subtarget generation!");
+}
+
+int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
+  SIEncodingFamily Gen = subtargetEncodingFamily(ST);
+
+  if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
+    ST.getGeneration() >= SISubtarget::GFX9)
+    Gen = SIEncodingFamily::GFX9;
+
+  if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
+    Gen = ST.getGeneration() == SISubtarget::GFX9 ? SIEncodingFamily::SDWA9
+                                                      : SIEncodingFamily::SDWA;
+  // Adjust the encoding family to GFX80 for D16 buffer instructions when the
+  // subtarget has UnpackedD16VMem feature.
+  // TODO: remove this when we discard GFX80 encoding.
+  if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
+                              && !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
+    Gen = SIEncodingFamily::GFX80;
+
+  int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
+
+  // -1 means that Opcode is already a native instruction.
+  if (MCOp == -1)
+    return Opcode;
+
+  // (uint16_t)-1 means that Opcode is a pseudo instruction that has
+  // no encoding in the given subtarget generation.
+  if (MCOp == (uint16_t)-1)
+    return -1;
+
+  return MCOp;
+}
Index: lib/Target/AMDGPU/SIInstrInfo.td
===================================================================
--- lib/Target/AMDGPU/SIInstrInfo.td
+++ lib/Target/AMDGPU/SIInstrInfo.td
@@ -17,6 +17,11 @@
 
 def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
 
+class GCNPredicateControl : PredicateControl {
+  Predicate SIAssemblerPredicate = isSICI;
+  Predicate VIAssemblerPredicate = isVI;
+}
+
 // Execpt for the NONE field, this must be kept in sync with the
 // SIEncodingFamily enum in AMDGPUInstrInfo.cpp
 def SIEncodingFamily {
Index: lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- lib/Target/AMDGPU/SIInstructions.td
+++ lib/Target/AMDGPU/SIInstructions.td
@@ -11,11 +11,10 @@
 // that are not yet supported remain commented out.
 //===----------------------------------------------------------------------===//
 
-class GCNPat<dag pattern, dag result> : AMDGPUPat<pattern, result> {
+class GCNPat<dag pattern, dag result> : Pat<pattern, result>, GCNPredicateControl {
   let SubtargetPredicate = isGCN;
 }
 
-
 include "VOPInstructions.td"
 include "SOPInstructions.td"
 include "SMInstructions.td"
Index: lib/Target/AMDGPU/SIRegisterInfo.h
===================================================================
--- lib/Target/AMDGPU/SIRegisterInfo.h
+++ lib/Target/AMDGPU/SIRegisterInfo.h
@@ -21,6 +21,7 @@
 
 namespace llvm {
 
+class AMDGPUSubtarget;
 class LiveIntervals;
 class MachineRegisterInfo;
 class SISubtarget;
Index: lib/Target/AMDGPU/SIRegisterInfo.cpp
===================================================================
--- lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1232,8 +1232,6 @@
     &AMDGPU::VReg_512RegClass,
     &AMDGPU::SReg_512RegClass,
     &AMDGPU::SCC_CLASSRegClass,
-    &AMDGPU::R600_Reg32RegClass,
-    &AMDGPU::R600_PredicateRegClass,
     &AMDGPU::Pseudo_SReg_32RegClass,
     &AMDGPU::Pseudo_SReg_128RegClass,
   };
Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
===================================================================
--- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -93,7 +93,7 @@
 
 /// \returns Maximum number of waves per execution unit for given subtarget \p
 /// Features without any kind of limitation.
-unsigned getMaxWavesPerEU(const FeatureBitset &Features);
+unsigned getMaxWavesPerEU();
 
 /// \returns Maximum number of waves per execution unit for given subtarget \p
 /// Features and limited by given \p FlatWorkGroupSize.
Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
===================================================================
--- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -228,7 +228,7 @@
   if (Features.test(FeatureGFX9))
     return {9, 0, 0};
 
-  if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
+  if (Features.test(FeatureSouthernIslands))
     return {0, 0, 0};
   return {7, 0, 0};
 }
@@ -286,7 +286,7 @@
 }
 
 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
-  return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
+  return getMaxWavesPerEU() * getEUsPerCU(Features);
 }
 
 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
@@ -298,9 +298,7 @@
   return 1;
 }
 
-unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
-  if (!Features.test(FeatureGCN))
-    return 8;
+unsigned getMaxWavesPerEU() {
   // FIXME: Need to take scratch memory into account.
   return 10;
 }
@@ -356,7 +354,7 @@
 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
   assert(WavesPerEU != 0);
 
-  if (WavesPerEU >= getMaxWavesPerEU(Features))
+  if (WavesPerEU >= getMaxWavesPerEU())
     return 0;
 
   unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
@@ -400,7 +398,7 @@
 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
   assert(WavesPerEU != 0);
 
-  if (WavesPerEU >= getMaxWavesPerEU(Features))
+  if (WavesPerEU >= getMaxWavesPerEU())
     return 0;
   unsigned MinNumVGPRs =
       alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
@@ -724,6 +722,8 @@
   case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
 
 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
+  if (STI.getTargetTriple().getArch() == Triple::r600)
+    return Reg;
   MAP_REG2REG
 }