Index: llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
@@ -34,6 +34,7 @@
 FunctionPass *createR600Packetizer();
 FunctionPass *createR600ControlFlowFinalizer();
 FunctionPass *createAMDGPUCFGStructurizerPass();
+FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel);
 
 // SI Passes
 FunctionPass *createSIAnnotateControlFlowPass();
Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -88,6 +88,9 @@
   StringRef getPassName() const override;
   void PostprocessISelDAG() override;
 
+protected:
+  void SelectBuildVector(SDNode *N, unsigned RegClassID);
+
 private:
   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
   bool isNoNanSrc(SDValue N) const;
@@ -106,8 +109,8 @@
   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
                                        SDValue& Offset);
-  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
-  bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
+  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
                        unsigned OffsetBits) const;
   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
@@ -207,10 +210,24 @@
   void SelectBRCOND(SDNode *N);
   void SelectATOMIC_CMP_SWAP(SDNode *N);
 
+protected:
   // Include the pieces autogenerated from the target description.
 #include "AMDGPUGenDAGISel.inc"
 };
 
+class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
+public:
+  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
+      AMDGPUDAGToDAGISel(TM, OptLevel) {}
+
+  void Select(SDNode *N) override;
+
+  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
+                          SDValue &Offset) override;
+  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+                          SDValue &Offset) override;
+};
+
 }  // end anonymous namespace
 
 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
@@ -226,6 +243,13 @@
   return new AMDGPUDAGToDAGISel(TM, OptLevel);
 }
 
+/// \brief This pass converts a legalized DAG into a R600-specific
+// DAG, ready for instruction scheduling.
+FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
+                                      CodeGenOpt::Level OptLevel) {
+  return new R600DAGToDAGISel(TM, OptLevel);
+}
+
 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
   Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
   return SelectionDAGISel::runOnMachineFunction(MF);
@@ -304,8 +328,7 @@
 }
 
 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
-  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
-      cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS)
+  if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS)
     return N;
 
   const SITargetLowering& Lowering =
@@ -359,6 +382,59 @@
   return false;
 }
 
+void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
+  unsigned Opc = N->getOpcode();
+  EVT VT = N->getValueType(0);
+  unsigned NumVectorElts = VT.getVectorNumElements();
+  EVT EltVT = VT.getVectorElementType();
+  const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
+  SDLoc DL(N);
+  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
+
+  if (NumVectorElts == 1) {
+    CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
+                         RegClass);
+    return;
+  }
+
+  assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
+                                  "supported yet");
+  // 16 = Max Num Vector Elements
+  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
+  // 1 = Vector Register Class
+  SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
+
+  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
+  bool IsRegSeq = true;
+  unsigned NOps = N->getNumOperands();
+  for (unsigned i = 0; i < NOps; i++) {
+    // XXX: Why is this here?
+    if (isa<RegisterSDNode>(N->getOperand(i))) {
+      IsRegSeq = false;
+      break;
+    }
+    RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
+    RegSeqArgs[1 + (2 * i) + 1] =
+            CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
+                                      MVT::i32);
+  }
+  if (NOps != NumVectorElts) {
+    // Fill in the missing undef elements if this was a scalar_to_vector.
+    assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
+    MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                                   DL, EltVT);
+    for (unsigned i = NOps; i < NumVectorElts; ++i) {
+      RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
+      RegSeqArgs[1 + (2 * i) + 1] =
+        CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
+    }
+  }
+
+  if (!IsRegSeq)
+    SelectCode(N);
+  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
+}
+
 void AMDGPUDAGToDAGISel::Select(SDNode *N) {
   unsigned int Opc = N->getOpcode();
   if (N->isMachineOpcode()) {
@@ -381,8 +457,7 @@
   case ISD::SUB:
   case ISD::SUBC:
   case ISD::SUBE: {
-    if (N->getValueType(0) != MVT::i64 ||
-        Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+    if (N->getValueType(0) != MVT::i64)
       break;
 
     SelectADD_SUB_I64(N);
@@ -403,10 +478,7 @@
   }
 
   case ISD::SCALAR_TO_VECTOR:
-  case AMDGPUISD::BUILD_VERTICAL_VECTOR:
   case ISD::BUILD_VECTOR: {
-    unsigned RegClassID;
-    const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
     EVT VT = N->getValueType(0);
     unsigned NumVectorElts = VT.getVectorNumElements();
     EVT EltVT = VT.getVectorElementType();
@@ -427,80 +499,12 @@
     }
 
     assert(EltVT.bitsEq(MVT::i32));
-
-    if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-      RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
-    } else {
-      // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
-      // that adds a 128 bits reg copy when going through TwoAddressInstructions
-      // pass. We want to avoid 128 bits copies as much as possible because they
-      // can't be bundled by our scheduler.
-      switch(NumVectorElts) {
-      case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
-      case 4:
-        if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
-          RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
-        else
-          RegClassID = AMDGPU::R600_Reg128RegClassID;
-        break;
-      default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
-      }
-    }
-
-    SDLoc DL(N);
-    SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
-
-    if (NumVectorElts == 1) {
-      CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
-                           RegClass);
-      return;
-    }
-
-    assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
-                                  "supported yet");
-    // 16 = Max Num Vector Elements
-    // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
-    // 1 = Vector Register Class
-    SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
-
-    RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
-    bool IsRegSeq = true;
-    unsigned NOps = N->getNumOperands();
-    for (unsigned i = 0; i < NOps; i++) {
-      // XXX: Why is this here?
-      if (isa<RegisterSDNode>(N->getOperand(i))) {
-        IsRegSeq = false;
-        break;
-      }
-      RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
-      RegSeqArgs[1 + (2 * i) + 1] =
-              CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
-                                        MVT::i32);
-    }
-
-    if (NOps != NumVectorElts) {
-      // Fill in the missing undef elements if this was a scalar_to_vector.
-      assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
-
-      MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
-                                                     DL, EltVT);
-      for (unsigned i = NOps; i < NumVectorElts; ++i) {
-        RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
-        RegSeqArgs[1 + (2 * i) + 1] =
-          CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
-      }
-    }
-
-    if (!IsRegSeq)
-      break;
-    CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
+    unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
+    SelectBuildVector(N, RegClassID);
     return;
   }
   case ISD::BUILD_PAIR: {
     SDValue RC, SubReg0, SubReg1;
-    if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
-      break;
-    }
     SDLoc DL(N);
     if (N->getValueType(0) == MVT::i128) {
       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
@@ -522,8 +526,7 @@
 
   case ISD::Constant:
   case ISD::ConstantFP: {
-    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
-        N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
+    if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
       break;
 
     uint64_t Imm;
@@ -558,9 +561,6 @@
 
   case AMDGPUISD::BFE_I32:
   case AMDGPUISD::BFE_U32: {
-    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
-      break;
-
     // There is a scalar version available, but unlike the vector version which
     // has a separate operand for the offset and width, the scalar version packs
     // the width and offset into a single operand. Try to move to the scalar
@@ -600,8 +600,7 @@
   case ISD::SRL:
   case ISD::SRA:
   case ISD::SIGN_EXTEND_INREG:
-    if (N->getValueType(0) != MVT::i32 ||
-        Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+    if (N->getValueType(0) != MVT::i32)
       break;
 
     SelectS_BFE(N);
@@ -663,32 +662,8 @@
 }
 
 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
-                                           SDValue &Offset) {
-  ConstantSDNode *IMMOffset;
-
-  if (Addr.getOpcode() == ISD::ADD
-      && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
-      && isInt<16>(IMMOffset->getZExtValue())) {
-
-      Base = Addr.getOperand(0);
-      Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
-                                         MVT::i32);
-      return true;
-  // If the pointer address is constant, we can move it to the offset field.
-  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
-             && isInt<16>(IMMOffset->getZExtValue())) {
-    Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
-                                  SDLoc(CurDAG->getEntryNode()),
-                                  AMDGPU::ZERO, MVT::i32);
-    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
-                                       MVT::i32);
-    return true;
-  }
-
-  // Default case, no offset
-  Base = Addr;
-  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
-  return true;
+                                            SDValue &Offset) {
+  return false;
 }
 
 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
@@ -1956,3 +1931,93 @@
     CurDAG->RemoveDeadNodes();
   } while (IsModified);
 }
+
+void R600DAGToDAGISel::Select(SDNode *N) {
+  unsigned int Opc = N->getOpcode();
+  if (N->isMachineOpcode()) {
+    N->setNodeId(-1);
+    return;   // Already selected.
+  }
+
+  switch (Opc) {
+  default: break;
+  case AMDGPUISD::BUILD_VERTICAL_VECTOR:
+  case ISD::SCALAR_TO_VECTOR:
+  case ISD::BUILD_VECTOR: {
+    EVT VT = N->getValueType(0);
+    unsigned NumVectorElts = VT.getVectorNumElements();
+    unsigned RegClassID;
+    // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+    // that adds a 128 bits reg copy when going through TwoAddressInstructions
+    // pass. We want to avoid 128 bits copies as much as possible because they
+    // can't be bundled by our scheduler.
+    switch(NumVectorElts) {
+    case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
+    case 4:
+      if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+        RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
+      else
+        RegClassID = AMDGPU::R600_Reg128RegClassID;
+      break;
+    default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
+    }
+    SelectBuildVector(N, RegClassID);
+    return;
+  }
+  }
+
+  SelectCode(N);
+}
+
+bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
+                                          SDValue &Offset) {
+  ConstantSDNode *C;
+  SDLoc DL(Addr);
+
+  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
+    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
+             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
+    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
+            (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+    Base = Addr.getOperand(0);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+  } else {
+    Base = Addr;
+    Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+  }
+
+  return true;
+}
+
+bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+                                          SDValue &Offset) {
+  ConstantSDNode *IMMOffset;
+
+  if (Addr.getOpcode() == ISD::ADD
+      && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+      && isInt<16>(IMMOffset->getZExtValue())) {
+
+      Base = Addr.getOperand(0);
+      Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+                                         MVT::i32);
+      return true;
+  // If the pointer address is constant, we can move it to the offset field.
+  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
+             && isInt<16>(IMMOffset->getZExtValue())) {
+    Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+                                  SDLoc(CurDAG->getEntryNode()),
+                                  AMDGPU::ZERO, MVT::i32);
+    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+                                       MVT::i32);
+    return true;
+  }
+
+  // Default case, no offset
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+  return true;
+}
Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -488,6 +488,7 @@
   }
 
   bool addPreISel() override;
+  bool addInstSelector() override;
   void addPreRegAlloc() override;
   void addPreSched2() override;
   void addPreEmitPass() override;
@@ -660,6 +661,11 @@
   return false;
 }
 
+bool R600PassConfig::addInstSelector() {
+  addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
+  return false;
+}
+
 void R600PassConfig::addPreRegAlloc() {
   addPass(createR600VectorRegMerger());
 }