Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6374,7 +6374,7 @@
 /// \brief Adjust the writemask of MIMG instructions
 void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
                                        SelectionDAG &DAG) const {
-  SDNode *Users[4] = { };
+  SDNode *Users[4] = { nullptr };
   unsigned Lane = 0;
   unsigned DmaskIdx = (Node->getNumOperands() - Node->getNumValues() == 9) ? 2 : 3;
   unsigned OldDmask = Node->getConstantOperandVal(DmaskIdx);
@@ -6426,18 +6426,6 @@
   Ops.insert(Ops.end(), Node->op_begin() + DmaskIdx + 1, Node->op_end());
   Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops);
 
-  // If we only got one lane, replace it with a copy
-  // (if NewDmask has only one bit set...)
-  if (NewDmask && (NewDmask & (NewDmask-1)) == 0) {
-    SDValue RC = DAG.getTargetConstant(AMDGPU::VGPR_32RegClassID, SDLoc(),
-                                       MVT::i32);
-    SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
-                                      SDLoc(), Users[Lane]->getValueType(0),
-                                      SDValue(Node, 0), RC);
-    DAG.ReplaceAllUsesWith(Users[Lane], Copy);
-    return;
-  }
-
   // Update the users of the node with the new indices
   for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
     SDNode *User = Users[i];
@@ -6606,18 +6594,41 @@
     unsigned DmaskIdx = MI.getNumOperands() == 12 ? 3 : 4;
     unsigned Writemask = MI.getOperand(DmaskIdx).getImm();
     unsigned BitsSet = 0;
-    for (unsigned i = 0; i < 4; ++i)
-      BitsSet += Writemask & (1 << i) ? 1 : 0;
+    unsigned SubRegIdx;
+      for (unsigned i = 0; i < 4; ++i)
+        BitsSet += Writemask & (1 << i) ? 1 : 0;
     switch (BitsSet) {
-    default: return;
-    case 1:  RC = &AMDGPU::VGPR_32RegClass; break;
-    case 2:  RC = &AMDGPU::VReg_64RegClass; break;
-    case 3:  RC = &AMDGPU::VReg_96RegClass; break;
+    default:
+      return;
+    case 1:
+      RC = &AMDGPU::VGPR_32RegClass;
+      SubRegIdx = AMDGPU::sub0;
+      break;
+    case 2:
+      RC = &AMDGPU::VReg_64RegClass;
+      SubRegIdx = AMDGPU::sub0_sub1;
+      break;
+    case 3:
+      RC = &AMDGPU::VReg_96RegClass;
+      SubRegIdx = AMDGPU::sub0_sub1_sub2;
+      break;
     }
 
+    auto InsPt = std::next(MI.getIterator());
+
+    unsigned TmpSuperReg = MRI.createVirtualRegister(&AMDGPU::VReg_128RegClass);
+    unsigned TmpReg = MRI.createVirtualRegister(RC);
+
     unsigned NewOpcode = TII->getMaskedMIMGOp(MI.getOpcode(), BitsSet);
     MI.setDesc(TII->get(NewOpcode));
-    MRI.setRegClass(VReg, RC);
+    MI.getOperand(0).setReg(TmpReg);
+
+    const DebugLoc &DL = MI.getDebugLoc();
+    BuildMI(*MI.getParent(), InsPt, DL, TII->get(AMDGPU::IMPLICIT_DEF), TmpSuperReg);
+    BuildMI(*MI.getParent(), InsPt, DL, TII->get(AMDGPU::INSERT_SUBREG), VReg)
+      .addReg(TmpSuperReg)
+      .addReg(TmpReg)
+      .addImm(SubRegIdx);
     return;
   }
 
Index: test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}adjust_writemask_crash_0:
+; GCN: image_get_lod v[0:1], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
+; GCN-NOT: v1
+; GCN-NOT: v0
+; GCN: buffer_store_dword v0
+define amdgpu_ps void @adjust_writemask_crash_0() #0 {
+main_body:
+  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+  %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
+  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
+  %tmp4 = extractelement <4 x float> %tmp3, i32 0
+  store volatile float %tmp4, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_crash_1:
+; GCN: image_get_lod v[0:1], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
+; GCN-NOT: v1
+; GCN-NOT: v0
+; GCN: buffer_store_dword v0
+define amdgpu_ps void @adjust_writemask_crash_1() #0 {
+main_body:
+  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+  %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
+  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
+  %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
+  %tmp4 = extractelement <4 x float> %tmp3, i32 1
+  store volatile float %tmp4, float addrspace(1)* undef
+  ret void
+}
+
+declare <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }