Index: lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- lib/Target/AMDGPU/SIInstrInfo.cpp
+++ lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2438,7 +2438,8 @@
 
     MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
     unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
-    if (VAddr) {
+    int Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode());
+    if (Addr64Opcode == -1) {
       // This is already an ADDR64 instruction so we need to add the pointer
       // extracted from the resource descriptor to the current value of VAddr.
       unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -2462,8 +2463,9 @@
           .addReg(NewVAddrHi)
           .addImm(AMDGPU::sub1);
     } else {
-      // This instructions is the _OFFSET variant, so we need to convert it to
-      // ADDR64.
+
+      // This instructions is the _OFFSET or _IDXEN variant, so we need to
+      // convert it to ADDR64.
       assert(MBB.getParent()->getSubtarget<SISubtarget>().getGeneration()
              < SISubtarget::VOLCANIC_ISLANDS &&
              "FIXME: Need to emit flat atomics here");
@@ -2471,7 +2473,6 @@
       MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
       MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
       MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
-      unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode());
 
       // Atomics rith return have have an additional tied operand and are
       // missing some of the special bits.
@@ -2520,15 +2521,40 @@
                      .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
       }
 
-      MI.removeFromParent();
+      const DebugLoc &DL = Addr64->getDebugLoc();
+      if (VAddr) {
+        // This is the _IDXEN variant. Add 32-bit index to the pointer.
 
-      // NewVaddr = {NewVaddrHi, NewVaddrLo}
-      BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
-              NewVAddr)
+        unsigned AddLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+        unsigned AddHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+        // NewVaddrLo = SRsrcPtr:sub0 + vaddr
+        BuildMI(MBB, Addr64, DL, get(AMDGPU::V_ADD_I32_e32), AddLo)
+          .addReg(SRsrcPtr, 0, AMDGPU::sub0)
+          .addReg(VAddr->getReg(), 0, VAddr->getSubReg());
+
+        // NewVaddrHi = SRsrcPtr:sub1 + carry
+        BuildMI(MBB, Addr64, DL, get(AMDGPU::V_ADDC_U32_e32), AddHi)
+          .addImm(0)
+          .addReg(SRsrcPtr, 0, AMDGPU::sub1);
+
+        // NewVaddr = {NewVaddrHi, NewVaddrLo}
+        BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+                NewVAddr)
+          .addReg(AddLo)
+          .addImm(AMDGPU::sub0)
+          .addReg(AddHi)
+          .addImm(AMDGPU::sub1);
+      } else {
+        // NewVaddr = {NewVaddrHi, NewVaddrLo}
+        BuildMI(MBB, Addr64, DL, get(AMDGPU::REG_SEQUENCE), NewVAddr)
           .addReg(SRsrcPtr, 0, AMDGPU::sub0)
           .addImm(AMDGPU::sub0)
           .addReg(SRsrcPtr, 0, AMDGPU::sub1)
           .addImm(AMDGPU::sub1);
+      }
+
+      MI.removeFromParent();
 
       VAddr = getNamedOperand(*Addr64, AMDGPU::OpName::vaddr);
       SRsrc = getNamedOperand(*Addr64, AMDGPU::OpName::srsrc);
Index: test/CodeGen/AMDGPU/move-to-valu-mubuf-idxen.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/move-to-valu-mubuf-idxen.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; FIXME: Broken for VI
+
+; GCN-LABEL: {{^}}move_to_valu_buffer_load_dword_offen:
+; GCN-DAG: buffer_load_dwordx2 v{{\[}}[[VRSRC0:[0-9]+]]:[[VRSRC1:[0-9]+]]{{\]}}
+; GCN-DAG: s_load_dword [[IDX:s[0-9]+]]
+; GCN-DAG: s_mov_b64 s{{\[}}[[SRSRC0:[0-9]+]]:{{[0-9]+\]}}, 0{{$}}
+
+; GCN-DAG: v_add_i32_e32 v[[ADD_LO:[0-9]+]], vcc, [[IDX]], v[[VRSRC0]]
+; GCN-DAG: v_addc_u32_e32 v[[ADD_HI:[0-9]+]], vcc, 0, v[[VRSRC1]], vcc
+
+; GCN: buffer_load_dword [[RESULT:v[0-9]+]], v{{\[}}[[ADD_LO]]:[[ADD_HI]]{{\]}}, s{{\[}}[[SRSRC0]]:{{[0-9]+\]}}, 0 addr64 offset:124
+; GCN: buffer_store_dword [[RESULT]]
+define void @move_to_valu_buffer_load_dword_offen(float addrspace(1)* %out, <2 x i32> addrspace(1)* %ptr, i32 %idx) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %tid.ext = zext i32 %tid to i64
+  %gep = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %ptr, i64 %tid.ext
+  %gep.out = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
+  %rsrc.ptr = load <2 x i32>, <2 x i32> addrspace(1)* %gep
+  %vgpr.rsrc = shufflevector <2 x i32> %rsrc.ptr, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ;load <4 x i32>, <4 x i32> addrspace(1)* %gep
+  %load = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %vgpr.rsrc, i32 %idx, i32 124, i1 0, i1 0)
+  store float %load, float addrspace(1)* %gep.out
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #2
+declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }