Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -109,8 +109,7 @@
   // Adjust the encoding family to GFX80 for D16 buffer instructions when the
   // subtarget has UnpackedD16VMem feature.
   // TODO: remove this when we discard GFX80 encoding.
-  if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
-                              && !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
+  if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
     Gen = SIEncodingFamily::GFX80;
 
   int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
Index: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -2301,10 +2301,6 @@
   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
     return true;
 
-  // Gather4 instructions do not need validation: dst size is hardcoded.
-  if (Desc.TSFlags & SIInstrFlags::Gather4)
-    return true;
-
   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
@@ -2319,9 +2315,12 @@
   if (DMask == 0)
     DMask = 1;
 
-  unsigned DataSize = countPopulation(DMask);
-  if ((Desc.TSFlags & SIInstrFlags::D16) != 0 && hasPackedD16()) {
-    DataSize = (DataSize + 1) / 2;
+  unsigned DataSize =
+    (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
+  if (hasPackedD16()) {
+    int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
+    if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
+      DataSize = (DataSize + 1) / 2;
   }
 
   return (VDataSize / 4) == DataSize + TFESize;
@@ -2389,10 +2388,14 @@
 
   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
     return true;
-  if ((Desc.TSFlags & SIInstrFlags::D16) == 0)
-    return true;
 
-  return !isCI() && !isSI();
+  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
+  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
+    if (isCI() || isSI())
+      return false;
+  }
+
+  return true;
 }
 
 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
@@ -4261,6 +4264,7 @@
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
 }
 
 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
@@ -4287,6 +4291,10 @@
   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
 }
 
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultD16() const {
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyD16);
+}
+
 //===----------------------------------------------------------------------===//
 // smrd
 //===----------------------------------------------------------------------===//
@@ -4389,6 +4397,7 @@
   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
   {"r128",    AMDGPUOperand::ImmTyR128,  true, nullptr},
   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
+  {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
@@ -5094,8 +5103,6 @@
     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
   case MCK_glc:
     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
-  case MCK_d16:
-    return Operand.isD16() ? Match_Success : Match_InvalidOperand;
   case MCK_idxen:
     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
   case MCK_offen:
Index: llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
+++ llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
@@ -720,7 +720,7 @@
   "buffer_store_format_xyzw", VReg_128
 >;
 
-let SubtargetPredicate = HasUnpackedD16VMem, D16 = 1 in {
+let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in {
   defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Loads <
     "buffer_load_format_d16_x", VGPR_32
   >;
@@ -747,7 +747,7 @@
   >;
 } // End HasUnpackedD16VMem.
 
-let SubtargetPredicate = HasPackedD16VMem, D16 = 1 in {
+let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
   defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Pseudo_Loads <
     "buffer_load_format_d16_x", VGPR_32
   >;
@@ -990,7 +990,7 @@
 defm TBUFFER_STORE_FORMAT_XYZ  : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz",  VReg_128>;
 defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>;
 
-let SubtargetPredicate = HasUnpackedD16VMem, D16 = 1 in {
+let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in {
   defm TBUFFER_LOAD_FORMAT_D16_X_gfx80     : MTBUF_Pseudo_Loads  <"tbuffer_load_format_d16_x",     VGPR_32>;
   defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80    : MTBUF_Pseudo_Loads  <"tbuffer_load_format_d16_xy",    VReg_64>;
   defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80   : MTBUF_Pseudo_Loads  <"tbuffer_load_format_d16_xyz",   VReg_96>;
@@ -1001,7 +1001,7 @@
   defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128>;
 } // End HasUnpackedD16VMem.
 
-let SubtargetPredicate = HasPackedD16VMem, D16 = 1 in {
+let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
   defm TBUFFER_LOAD_FORMAT_D16_X     : MTBUF_Pseudo_Loads  <"tbuffer_load_format_d16_x",     VGPR_32>;
   defm TBUFFER_LOAD_FORMAT_D16_XY    : MTBUF_Pseudo_Loads  <"tbuffer_load_format_d16_xy",    VGPR_32>;
   defm TBUFFER_LOAD_FORMAT_D16_XYZ   : MTBUF_Pseudo_Loads  <"tbuffer_load_format_d16_xyz",   VReg_64>;
Index: llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -289,10 +289,6 @@
 // as if it has 1 dword, which could be not really so.
 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
 
-  if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4) {
-    return MCDisassembler::Success;
-  }
-
   int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
                                            AMDGPU::OpName::vdst);
 
@@ -304,22 +300,25 @@
 
   int TFEIdx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
                                             AMDGPU::OpName::tfe);
+  int D16Idx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+                                            AMDGPU::OpName::d16);
 
   assert(VDataIdx != -1);
   assert(DMaskIdx != -1);
   assert(TFEIdx != -1);
 
   bool IsAtomic = (VDstIdx != -1);
+  bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
 
   unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
   if (DMask == 0)
     return MCDisassembler::Success;
 
-  unsigned DstSize = countPopulation(DMask);
+  unsigned DstSize = IsGather4 ? 4 : countPopulation(DMask);
   if (DstSize == 1)
     return MCDisassembler::Success;
 
-  bool D16 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::D16;
+  bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
   if (D16 && AMDGPU::hasPackedD16(STI)) {
     DstSize = (DstSize + 1) / 2;
   }
@@ -335,6 +334,11 @@
       NewOpcode = AMDGPU::getMaskedMIMGAtomicOp(*MCII, MI.getOpcode(), DstSize);
     }
     if (NewOpcode == -1) return MCDisassembler::Success;
+  } else if (IsGather4) {
+    if (D16 && AMDGPU::hasPackedD16(STI))
+      NewOpcode = AMDGPU::getMIMGGatherOpPackedD16(MI.getOpcode());
+    else
+      return MCDisassembler::Success;
   } else {
     NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), DstSize);
     assert(NewOpcode != -1 && "could not find matching mimg channel instruction");
Index: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -84,6 +84,8 @@
                  raw_ostream &O);
   void printLWE(const MCInst *MI, unsigned OpNo,
                 const MCSubtargetInfo &STI, raw_ostream &O);
+  void printD16(const MCInst *MI, unsigned OpNo,
+                const MCSubtargetInfo &STI, raw_ostream &O);
   void printExpCompr(const MCInst *MI, unsigned OpNo,
                      const MCSubtargetInfo &STI, raw_ostream &O);
   void printExpVM(const MCInst *MI, unsigned OpNo,
Index: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -217,6 +217,11 @@
   printNamedBit(MI, OpNo, O, "lwe");
 }
 
+void AMDGPUInstPrinter::printD16(const MCInst *MI, unsigned OpNo,
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
+  printNamedBit(MI, OpNo, O, "d16");
+}
+
 void AMDGPUInstPrinter::printExpCompr(const MCInst *MI, unsigned OpNo,
                                       const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
Index: llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td
+++ llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td
@@ -17,6 +17,11 @@
   int AtomicSize = !if(is32Bit, 1, 2);
 }
 
+class MIMG_Gather_Size <string op, int channels> {
+  string Op = op;
+  int Channels = channels;
+}
+
 class mimg <bits<7> si, bits<7> vi = si> {
   field bits<7> SI = si;
   field bits<7> VI = vi;
@@ -37,125 +42,88 @@
 class MIMG_NoSampler_Helper <bits<7> op, string asm,
                              RegisterClass dst_rc,
                              RegisterClass addr_rc,
-                             bit d16_bit=0,
-                             string dns=""> : MIMG_Helper <
-  (outs dst_rc:$vdata),
-  (ins addr_rc:$vaddr, SReg_256:$srsrc,
-       DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
-       R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
-  asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""),
-  dns>, MIMGe<op> {
+                             bit has_d16,
+                             string dns="">
+  : MIMG_Helper <(outs dst_rc:$vdata),
+                 !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
+                           DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
+                           R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+                      !if(has_d16, (ins D16:$d16), (ins))),
+                 asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
+                   #!if(has_d16, "$d16", ""),
+                 dns>,
+    MIMGe<op> {
   let ssamp = 0;
-  let D16 = d16;
-}
 
-multiclass MIMG_NoSampler_Src_Helper_Helper <bits<7> op, string asm,
-                                             RegisterClass dst_rc,
-                                             int channels, bit d16_bit,
-                                             string suffix> {
-  def NAME # _V1 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32, d16_bit,
-                                                   !if(!eq(channels, 1), "AMDGPU", "")>,
-                            MIMG_Mask<asm#"_V1"#suffix, channels>;
-  def NAME # _V2 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64, d16_bit>,
-                            MIMG_Mask<asm#"_V2"#suffix, channels>;
-  def NAME # _V3 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_96, d16_bit>,
-                            MIMG_Mask<asm#"_V3"#suffix, channels>;
-  def NAME # _V4 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128, d16_bit>,
-                            MIMG_Mask<asm#"_V4"#suffix, channels>;
+  let HasD16 = has_d16;
+  let d16 = !if(HasD16, ?, 0);
 }
 
 multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
-                                      RegisterClass dst_rc,
-                                      int channels> {
-  defm NAME : MIMG_NoSampler_Src_Helper_Helper <op, asm, dst_rc, channels, 0, "">;
-
-  let d16 = 1 in {
-    let SubtargetPredicate = HasPackedD16VMem in {
-      defm NAME : MIMG_NoSampler_Src_Helper_Helper <op, asm, dst_rc, channels, 1, "_D16">;
-    } // End HasPackedD16VMem.
-
-    let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
-      defm NAME : MIMG_NoSampler_Src_Helper_Helper <op, asm, dst_rc, channels, 1, "_D16_gfx80">;
-    } // End HasUnpackedD16VMem.
-  } // End d16 = 1.
-}
-
-multiclass MIMG_NoSampler <bits<7> op, string asm> {
-  defm _V1 : MIMG_NoSampler_Src_Helper <op, asm, VGPR_32, 1>;
-  defm _V2 : MIMG_NoSampler_Src_Helper <op, asm, VReg_64, 2>;
-  defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 3>;
-  defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 4>;
-}
-
-multiclass MIMG_PckNoSampler <bits<7> op, string asm> {
-  defm NAME # _V1 : MIMG_NoSampler_Src_Helper_Helper <op, asm, VGPR_32, 1, 0, "">;
-  defm NAME # _V2 : MIMG_NoSampler_Src_Helper_Helper <op, asm, VReg_64, 2, 0, "">;
-  defm NAME # _V3 : MIMG_NoSampler_Src_Helper_Helper <op, asm, VReg_96, 3, 0, "">;
-  defm NAME # _V4 : MIMG_NoSampler_Src_Helper_Helper <op, asm, VReg_128, 4, 0, "">;
+                                             RegisterClass dst_rc,
+                                             int channels, bit has_d16> {
+  def NAME # _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32, has_d16,
+                                         !if(!eq(channels, 1), "AMDGPU", "")>,
+                   MIMG_Mask<asm#"_V1", channels>;
+  def NAME # _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64, has_d16>,
+                   MIMG_Mask<asm#"_V2", channels>;
+  def NAME # _V3 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_96, has_d16>,
+                   MIMG_Mask<asm#"_V3", channels>;
+  def NAME # _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128, has_d16>,
+                   MIMG_Mask<asm#"_V4", channels>;
+}
+
+multiclass MIMG_NoSampler <bits<7> op, string asm, bit has_d16> {
+  defm _V1 : MIMG_NoSampler_Src_Helper <op, asm, VGPR_32, 1, has_d16>;
+  defm _V2 : MIMG_NoSampler_Src_Helper <op, asm, VReg_64, 2, has_d16>;
+  defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 3, has_d16>;
+  defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 4, has_d16>;
 }
 
 class MIMG_Store_Helper <bits<7> op, string asm,
                          RegisterClass data_rc,
                          RegisterClass addr_rc,
-                         bit d16_bit=0,
-                         string dns = ""> : MIMG_Helper <
-  (outs),
-  (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
-       DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
-       R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
-  asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), dns>, MIMGe<op> {
+                         bit has_d16,
+                         string dns = "">
+  : MIMG_Helper <(outs),
+                 !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
+                           DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
+                           R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+                      !if(has_d16, (ins D16:$d16), (ins))),
+                 asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
+                   #!if(has_d16, "$d16", ""),
+                 dns>,
+    MIMGe<op> {
   let ssamp = 0;
   let mayLoad = 0;
   let mayStore = 1;
   let hasSideEffects = 0;
   let hasPostISelHook = 0;
   let DisableWQM = 1;
-  let D16 = d16;
-}
 
-multiclass MIMG_Store_Addr_Helper_Helper <bits<7> op, string asm,
-                                  RegisterClass data_rc,
-                                  int channels, bit d16_bit,
-                                  string suffix> {
-  def NAME # _V1 # suffix : MIMG_Store_Helper <op, asm, data_rc, VGPR_32, d16_bit,
-                                               !if(!eq(channels, 1), "AMDGPU", "")>,
-                            MIMG_Mask<asm#"_V1"#suffix, channels>;
-  def NAME # _V2 # suffix : MIMG_Store_Helper <op, asm, data_rc, VReg_64, d16_bit>,
-                            MIMG_Mask<asm#"_V2"#suffix, channels>;
-  def NAME # _V3 # suffix : MIMG_Store_Helper <op, asm, data_rc, VReg_96, d16_bit>,
-                            MIMG_Mask<asm#"_V3"#suffix, channels>;
-  def NAME # _V4 # suffix : MIMG_Store_Helper <op, asm, data_rc, VReg_128, d16_bit>,
-                            MIMG_Mask<asm#"_V4"#suffix, channels>;
+  let HasD16 = has_d16;
+  let d16 = !if(HasD16, ?, 0);
 }
 
 multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,
                                   RegisterClass data_rc,
-                                  int channels> {
-  defm NAME : MIMG_Store_Addr_Helper_Helper <op, asm, data_rc, channels, 0, "">;
-
-  let d16 = 1 in {
-    let SubtargetPredicate = HasPackedD16VMem in {
-      defm NAME : MIMG_Store_Addr_Helper_Helper <op, asm, data_rc, channels, 1, "_D16">;
-    } // End HasPackedD16VMem.
-
-    let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
-      defm NAME : MIMG_Store_Addr_Helper_Helper <op, asm, data_rc, channels, 1, "_D16_gfx80">;
-    } // End HasUnpackedD16VMem.
-  } // End d16 = 1.
-}
-
-multiclass MIMG_Store <bits<7> op, string asm> {
-  defm _V1 : MIMG_Store_Addr_Helper <op, asm, VGPR_32, 1>;
-  defm _V2 : MIMG_Store_Addr_Helper <op, asm, VReg_64, 2>;
-  defm _V3 : MIMG_Store_Addr_Helper <op, asm, VReg_96, 3>;
-  defm _V4 : MIMG_Store_Addr_Helper <op, asm, VReg_128, 4>;
-}
-
-multiclass MIMG_PckStore <bits<7> op, string asm> {
-  defm NAME # _V1 : MIMG_Store_Addr_Helper_Helper <op, asm, VGPR_32, 1, 0, "">;
-  defm NAME # _V2 : MIMG_Store_Addr_Helper_Helper <op, asm, VReg_64, 2, 0, "">;
-  defm NAME # _V3 : MIMG_Store_Addr_Helper_Helper <op, asm, VReg_96, 3, 0, "">;
-  defm NAME # _V4 : MIMG_Store_Addr_Helper_Helper <op, asm, VReg_128, 4, 0, "">;
+                                  int channels, bit has_d16> {
+  def NAME # _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32, has_d16,
+                                      !if(!eq(channels, 1), "AMDGPU", "")>,
+                   MIMG_Mask<asm#"_V1", channels>;
+  def NAME # _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64, has_d16>,
+                   MIMG_Mask<asm#"_V2", channels>;
+  def NAME # _V3 : MIMG_Store_Helper <op, asm, data_rc, VReg_96, has_d16>,
+                   MIMG_Mask<asm#"_V3", channels>;
+  def NAME # _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128, has_d16>,
+                   MIMG_Mask<asm#"_V4", channels>;
+}
+
+multiclass MIMG_Store <bits<7> op, string asm, bit has_d16> {
+  defm _V1 : MIMG_Store_Addr_Helper <op, asm, VGPR_32, 1, has_d16>;
+  defm _V2 : MIMG_Store_Addr_Helper <op, asm, VReg_64, 2, has_d16>;
+  defm _V3 : MIMG_Store_Addr_Helper <op, asm, VReg_96, 3, has_d16>;
+  defm _V4 : MIMG_Store_Addr_Helper <op, asm, VReg_128, 4, has_d16>;
 }
 
 class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
@@ -177,23 +145,27 @@
 }
 
 class MIMG_Atomic_Real_si<mimg op, string name, string asm,
-  RegisterClass data_rc, RegisterClass addr_rc, bit enableDasm> :
-  MIMG_Atomic_Helper<asm, data_rc, addr_rc, "SICI", enableDasm>,
-  SIMCInstr<name, SIEncodingFamily.SI>,
-  MIMGe<op.SI> {
+                          RegisterClass data_rc, RegisterClass addr_rc,
+                          bit enableDasm>
+  : MIMG_Atomic_Helper<asm, data_rc, addr_rc, "SICI", enableDasm>,
+    SIMCInstr<name, SIEncodingFamily.SI>,
+    MIMGe<op.SI> {
   let isCodeGenOnly = 0;
   let AssemblerPredicates = [isSICI];
   let DisableDecoder = DisableSIDecoder;
+  let d16 = 0;
 }
 
 class MIMG_Atomic_Real_vi<mimg op, string name, string asm,
-  RegisterClass data_rc, RegisterClass addr_rc, bit enableDasm> :
-  MIMG_Atomic_Helper<asm, data_rc, addr_rc, "VI", enableDasm>,
-  SIMCInstr<name, SIEncodingFamily.VI>,
-  MIMGe<op.VI> {
+                          RegisterClass data_rc, RegisterClass addr_rc,
+                          bit enableDasm>
+  : MIMG_Atomic_Helper<asm, data_rc, addr_rc, "VI", enableDasm>,
+    SIMCInstr<name, SIEncodingFamily.VI>,
+    MIMGe<op.VI> {
   let isCodeGenOnly = 0;
   let AssemblerPredicates = [isVI];
   let DisableDecoder = DisableVIDecoder;
+  let d16 = 0;
 }
 
 multiclass MIMG_Atomic_Helper_m <mimg op,
@@ -245,59 +217,46 @@
 class MIMG_Sampler_Helper <bits<7> op, string asm,
                            RegisterClass dst_rc,
                            RegisterClass src_rc,
-                           bit wqm,
-                           bit d16_bit=0,
-                           string dns=""> : MIMG_Helper <
-  (outs dst_rc:$vdata),
-  (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
-       DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
-       R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
-  asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""),
-  dns>, MIMGe<op> {
+                           bit wqm, bit has_d16,
+                           string dns="">
+  : MIMG_Helper <(outs dst_rc:$vdata),
+                 !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
+                           DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
+                           R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+                      !if(has_d16, (ins D16:$d16), (ins))),
+                 asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"
+                   #!if(has_d16, "$d16", ""),
+                 dns>,
+    MIMGe<op> {
   let WQM = wqm;
-  let D16 = d16;
-}
 
-multiclass MIMG_Sampler_Src_Helper_Helper <bits<7> op, string asm,
-                                    RegisterClass dst_rc,
-                                    int channels, bit wqm,
-                                    bit d16_bit, string suffix> {
-    def _V1 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm, d16_bit,
-                                   !if(!eq(channels, 1), "AMDGPU", "")>,
-                       MIMG_Mask<asm#"_V1"#suffix, channels>;
-    def _V2 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm, d16_bit>,
-                       MIMG_Mask<asm#"_V2"#suffix, channels>;
-    def _V3 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_96, wqm, d16_bit>,
-                       MIMG_Mask<asm#"_V3"#suffix, channels>;
-    def _V4 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm, d16_bit>,
-                       MIMG_Mask<asm#"_V4"#suffix, channels>;
-    def _V8 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm, d16_bit>,
-                       MIMG_Mask<asm#"_V8"#suffix, channels>;
-    def _V16 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm, d16_bit>,
-                        MIMG_Mask<asm#"_V16"#suffix, channels>;
+  let HasD16 = has_d16;
+  let d16 = !if(HasD16, ?, 0);
 }
 
 multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
                                     RegisterClass dst_rc,
-                                    int channels, bit wqm> {
-  defm "" : MIMG_Sampler_Src_Helper_Helper <op, asm, dst_rc, channels, wqm, 0, "">;
-
-  let d16 = 1 in {
-    let SubtargetPredicate = HasPackedD16VMem in {
-      defm "" : MIMG_Sampler_Src_Helper_Helper <op, asm, dst_rc, channels, wqm, 1, "_D16">;
-    } // End HasPackedD16VMem.
-
-    let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
-      defm "" : MIMG_Sampler_Src_Helper_Helper <op, asm, dst_rc, channels, wqm, 1, "_D16_gfx80">;
-    } // End HasUnpackedD16VMem.
-  } // End d16 = 1.
-}
-
-multiclass MIMG_Sampler <bits<7> op, string asm, bit wqm=0> {
-  defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, wqm>;
-  defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, wqm>;
-  defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, wqm>;
-  defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, wqm>;
+                                    int channels, bit wqm, bit has_d16> {
+    def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm, has_d16,
+                                   !if(!eq(channels, 1), "AMDGPU", "")>,
+              MIMG_Mask<asm#"_V1", channels>;
+    def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm, has_d16>,
+              MIMG_Mask<asm#"_V2", channels>;
+    def _V3 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_96, wqm, has_d16>,
+              MIMG_Mask<asm#"_V3", channels>;
+    def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm, has_d16>,
+              MIMG_Mask<asm#"_V4", channels>;
+    def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm, has_d16>,
+              MIMG_Mask<asm#"_V8", channels>;
+    def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm, has_d16>,
+               MIMG_Mask<asm#"_V16", channels>;
+}
+
+multiclass MIMG_Sampler <bits<7> op, string asm, bit wqm = 0, bit has_d16 = 1> {
+  defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, wqm, has_d16>;
+  defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, wqm, has_d16>;
+  defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, wqm, has_d16>;
+  defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, wqm, has_d16>;
 }
 
 multiclass MIMG_Sampler_WQM <bits<7> op, string asm> : MIMG_Sampler<op, asm, 1>;
@@ -306,14 +265,14 @@
                           RegisterClass dst_rc,
                           RegisterClass src_rc,
                           bit wqm,
-                          bit d16_bit=0,
-                          string dns=""> : MIMG <
-  (outs dst_rc:$vdata),
-  (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
-       DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
-       R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
-  asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""),
-  []>, MIMGe<op> {
+                          string dns="">
+  : MIMG <(outs dst_rc:$vdata),
+          (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
+               DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
+               R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da, D16:$d16),
+          asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da$d16",
+          []>,
+    MIMGe<op> {
   let mayLoad = 1;
   let mayStore = 0;
 
@@ -327,7 +286,7 @@
   let Gather4 = 1;
   let hasPostISelHook = 0;
   let WQM = wqm;
-  let D16 = d16;
+  let HasD16 = 1;
 
   let DecoderNamespace = dns;
   let isAsmParserOnly = !if(!eq(dns,""), 1, 0);
@@ -336,29 +295,25 @@
 
 multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
                                     RegisterClass dst_rc,
-                                    bit wqm, bit d16_bit,
-                                    string prefix,
-                                    string suffix> {
-  def prefix # _V1 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm, d16_bit, "AMDGPU">;
-  def prefix # _V2 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm, d16_bit>;
-  def prefix # _V3 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_96, wqm, d16_bit>;
-  def prefix # _V4 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm, d16_bit>;
-  def prefix # _V8 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm, d16_bit>;
-  def prefix # _V16 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm, d16_bit>;
+                                    int channels, bit wqm> {
+  def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm,
+                                !if(!eq(channels, 4), "AMDGPU", "")>,
+            MIMG_Gather_Size<asm#"_V1", channels>;
+  def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm>,
+            MIMG_Gather_Size<asm#"_V2", channels>;
+  def _V3 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_96, wqm>,
+            MIMG_Gather_Size<asm#"_V3", channels>;
+  def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm>,
+            MIMG_Gather_Size<asm#"_V4", channels>;
+  def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm>,
+            MIMG_Gather_Size<asm#"_V8", channels>;
+  def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm>,
+             MIMG_Gather_Size<asm#"_V16", channels>;
 }
 
 multiclass MIMG_Gather <bits<7> op, string asm, bit wqm=0> {
-  defm "" : MIMG_Gather_Src_Helper<op, asm, VReg_128, wqm, 0, "_V4", "">;
-
-  let d16 = 1 in {
-    let AssemblerPredicate = HasPackedD16VMem in {
-      defm "" : MIMG_Gather_Src_Helper<op, asm, VReg_64, wqm, 1, "_V2", "_D16">;
-    } // End HasPackedD16VMem.
-
-    let AssemblerPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
-      defm "" : MIMG_Gather_Src_Helper<op, asm, VReg_128, wqm, 1, "_V4", "_D16_gfx80">;
-    } // End HasUnpackedD16VMem.
-  } // End d16 = 1.
+  defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, wqm>; /* for packed D16 only */
+  defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, wqm>;
 }
 
 multiclass MIMG_Gather_WQM <bits<7> op, string asm> : MIMG_Gather<op, asm, 1>;
@@ -367,19 +322,19 @@
 // MIMG Instructions
 //===----------------------------------------------------------------------===//
 let SubtargetPredicate = isGCN in {
-defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load">;
-defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip">;
-defm IMAGE_LOAD_PCK : MIMG_PckNoSampler <0x00000002, "image_load_pck">;
-defm IMAGE_LOAD_PCK_SGN : MIMG_PckNoSampler <0x00000003, "image_load_pck_sgn">;
-defm IMAGE_LOAD_MIP_PCK : MIMG_PckNoSampler <0x00000004, "image_load_mip_pck">;
-defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_PckNoSampler <0x00000005, "image_load_mip_pck_sgn">;
-defm IMAGE_STORE : MIMG_Store <0x00000008, "image_store">;
-defm IMAGE_STORE_MIP : MIMG_Store <0x00000009, "image_store_mip">;
-defm IMAGE_STORE_PCK : MIMG_PckStore <0x0000000a, "image_store_pck">;
-defm IMAGE_STORE_MIP_PCK : MIMG_PckStore <0x0000000b, "image_store_mip_pck">;
+defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load", 1>;
+defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip", 1>;
+defm IMAGE_LOAD_PCK : MIMG_NoSampler <0x00000002, "image_load_pck", 0>;
+defm IMAGE_LOAD_PCK_SGN : MIMG_NoSampler <0x00000003, "image_load_pck_sgn", 0>;
+defm IMAGE_LOAD_MIP_PCK : MIMG_NoSampler <0x00000004, "image_load_mip_pck", 0>;
+defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoSampler <0x00000005, "image_load_mip_pck_sgn", 0>;
+defm IMAGE_STORE : MIMG_Store <0x00000008, "image_store", 1>;
+defm IMAGE_STORE_MIP : MIMG_Store <0x00000009, "image_store_mip", 1>;
+defm IMAGE_STORE_PCK : MIMG_Store <0x0000000a, "image_store_pck", 0>;
+defm IMAGE_STORE_MIP_PCK : MIMG_Store <0x0000000b, "image_store_mip_pck", 0>;
 
 let mayLoad = 0, mayStore = 0 in {
-defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
+defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo", 0>;
 }
 
 defm IMAGE_ATOMIC_SWAP : MIMG_Atomic <mimg<0x0f, 0x10>, "image_atomic_swap">;
@@ -457,7 +412,7 @@
 defm IMAGE_GATHER4_C_LZ_O   : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">;
 
 let mayLoad = 0, mayStore = 0 in {
-defm IMAGE_GET_LOD          : MIMG_Sampler_WQM <0x00000060, "image_get_lod">;
+defm IMAGE_GET_LOD          : MIMG_Sampler <0x00000060, "image_get_lod", 1, 0>;
 }
 
 defm IMAGE_SAMPLE_CD        : MIMG_Sampler <0x00000068, "image_sample_cd">;
@@ -519,13 +474,13 @@
 }
 
 class ImageDimPattern<AMDGPUImageDimIntrinsic I,
-                      string dop, ValueType dty,
+                      string dop, ValueType dty, bit d16,
                       string suffix = ""> : GCNPat<(undef), (undef)> {
   list<AMDGPUArg> AddrArgs = I.P.AddrDefaultArgs;
   getDwordsType AddrDwords = getDwordsType<!size(AddrArgs)>;
 
-  Instruction MI =
-    !cast<Instruction>(!strconcat("IMAGE_", I.P.OpMod, dop, AddrDwords.suffix, suffix));
+  MIMG MI =
+    !cast<MIMG>(!strconcat("IMAGE_", I.P.OpMod, dop, AddrDwords.suffix, suffix));
 
   // DAG fragment to match data arguments (vdata for store/atomic, dmask
   // for non-atomic).
@@ -581,7 +536,8 @@
              0, /* r128 */
              0, /* tfe */
              0 /*(as_i1imm $lwe)*/,
-             { I.P.Dim.DA }));
+             { I.P.Dim.DA }),
+         !if(MI.HasD16, (MI d16), (MI)));
   let ResultInstrs = [
     !if(IsCmpSwap, (EXTRACT_SUBREG ImageInstruction, sub0), ImageInstruction)
   ];
@@ -589,23 +545,23 @@
 
 foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
                            AMDGPUImageDimGetResInfoIntrinsics) in {
-  def intr#_pat_v1 : ImageDimPattern<intr, "_V1", f32>;
-  def intr#_pat_v2 : ImageDimPattern<intr, "_V2", v2f32>;
-  def intr#_pat_v4 : ImageDimPattern<intr, "_V4", v4f32>;
+  def intr#_pat_v1 : ImageDimPattern<intr, "_V1", f32, 0>;
+  def intr#_pat_v2 : ImageDimPattern<intr, "_V2", v2f32, 0>;
+  def intr#_pat_v4 : ImageDimPattern<intr, "_V4", v4f32, 0>;
 }
 
 multiclass ImageDimD16Helper<AMDGPUImageDimIntrinsic I,
                              AMDGPUImageDimIntrinsic d16helper> {
   let SubtargetPredicate = HasUnpackedD16VMem in {
-    def _unpacked_v1 : ImageDimPattern<I, "_V1", f16, "_D16_gfx80">;
-    def _unpacked_v2 : ImageDimPattern<d16helper, "_V2", v2i32, "_D16_gfx80">;
-    def _unpacked_v4 : ImageDimPattern<d16helper, "_V4", v4i32, "_D16_gfx80">;
+    def _unpacked_v1 : ImageDimPattern<I, "_V1", f16, 1>;
+    def _unpacked_v2 : ImageDimPattern<d16helper, "_V2", v2i32, 1>;
+    def _unpacked_v4 : ImageDimPattern<d16helper, "_V4", v4i32, 1>;
   } // End HasUnpackedD16VMem.
 
   let SubtargetPredicate = HasPackedD16VMem in {
-    def _packed_v1 : ImageDimPattern<I, "_V1", f16, "_D16">;
-    def _packed_v2 : ImageDimPattern<I, "_V1", v2f16, "_D16">;
-    def _packed_v4 : ImageDimPattern<I, "_V2", v4f16, "_D16">;
+    def _packed_v1 : ImageDimPattern<I, "_V1", f16, 1>;
+    def _packed_v2 : ImageDimPattern<I, "_V1", v2f16, 1>;
+    def _packed_v4 : ImageDimPattern<I, "_V2", v4f16, 1>;
   } // End HasPackedD16VMem.
 }
 
@@ -627,7 +583,7 @@
 }
 
 foreach intr = AMDGPUImageDimGatherIntrinsics in {
-  def intr#_pat3 : ImageDimPattern<intr, "_V4", v4f32>;
+  def intr#_pat3 : ImageDimPattern<intr, "_V4", v4f32, 0>;
 
   def intr#_d16helper_profile : AMDGPUDimProfileCopy<intr.P> {
     let RetTypes = !foreach(ty, intr.P.RetTypes, llvm_any_ty);
@@ -643,16 +599,16 @@
     def intr#_unpacked_v4 :
         ImageDimPattern<!cast<AMDGPUImageDimIntrinsic>(
                             "int_SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name),
-                        "_V4", v4i32, "_D16_gfx80">;
+                        "_V4", v4i32, 1>;
   } // End HasUnpackedD16VMem.
 
   let SubtargetPredicate = HasPackedD16VMem in {
-    def intr#_packed_v4 : ImageDimPattern<intr, "_V2", v4f16, "_D16">;
+    def intr#_packed_v4 : ImageDimPattern<intr, "_V2", v4f16, 1>;
   } // End HasPackedD16VMem.
 }
 
 foreach intr = AMDGPUImageDimAtomicIntrinsics in {
-  def intr#_pat1 : ImageDimPattern<intr, "_V1", i32>;
+  def intr#_pat1 : ImageDimPattern<intr, "_V1", i32, 0>;
 }
 
 /********** ======================= **********/
@@ -663,154 +619,160 @@
 // TODO:
 // 1. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128).
 // 2. Add A16 support when we pass address of half type.
-multiclass ImageSamplePattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt>  {
+multiclass ImageSamplePattern<SDPatternOperator name, MIMG opcode,
+                              ValueType dt, ValueType vt, bit d16>  {
   def : GCNPat<
     (dt (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, i1:$glc,
         i1:$slc, i1:$lwe, i1:$da)),
-    (opcode $addr, $rsrc, $sampler,
-          (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc),
-          0, 0, (as_i1imm $lwe), (as_i1imm $da))
+    !con((opcode $addr, $rsrc, $sampler, (as_i32imm $dmask), (as_i1imm $unorm),
+                (as_i1imm $glc), (as_i1imm $slc), 0, 0, (as_i1imm $lwe),
+                (as_i1imm $da)),
+         !if(opcode.HasD16, (opcode d16), (opcode)))
     >;
 }
 
-multiclass ImageSampleDataPatterns<SDPatternOperator name, string opcode, ValueType dt, string suffix = ""> {
-  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V1 # suffix), dt, f32>;
-  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V2 # suffix), dt, v2f32>;
-  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V4 # suffix), dt, v4f32>;
-  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V8 # suffix), dt, v8f32>;
-  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V16 # suffix), dt, v16f32>;
+multiclass ImageSampleDataPatterns<SDPatternOperator name, string opcode,
+                                   ValueType dt, bit d16> {
+  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V1), dt, f32, d16>;
+  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V2), dt, v2f32, d16>;
+  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V4), dt, v4f32, d16>;
+  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V8), dt, v8f32, d16>;
+  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V16), dt, v16f32, d16>;
 }
 
 // ImageSample patterns.
 multiclass ImageSamplePatterns<SDPatternOperator name, string opcode> {
-  defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f32>;
-  defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
-  defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
+  defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f32, 0>;
+  defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2f32, 0>;
+  defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32, 0>;
 
   let SubtargetPredicate = HasUnpackedD16VMem in {
-    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16_gfx80">;
+    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
   } // End HasUnpackedD16VMem.
 
   let SubtargetPredicate = HasPackedD16VMem in {
-    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
-    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">;
-    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v4f16, "_D16">;
+    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
+    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), v2f16, 1>;
+    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v4f16, 1>;
   } // End HasPackedD16VMem.
 }
 
 // ImageSample alternative patterns for illegal vector half Types.
 multiclass ImageSampleAltPatterns<SDPatternOperator name, string opcode> {
   let SubtargetPredicate = HasUnpackedD16VMem in {
-    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">;
-    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">;
+    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2i32, 1>;
+    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4i32, 1>;
   } // End HasUnpackedD16VMem.
 }
 
 // ImageGather4 patterns.
 multiclass ImageGather4Patterns<SDPatternOperator name, string opcode> {
-  defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
+  defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32, 0>;
 
   let SubtargetPredicate = HasPackedD16VMem in {
-    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v4f16, "_D16">;
+    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v4f16, 1>;
   } // End HasPackedD16VMem.
 }
 
 // ImageGather4 alternative patterns for illegal vector half Types.
 multiclass ImageGather4AltPatterns<SDPatternOperator name, string opcode> {
   let SubtargetPredicate = HasUnpackedD16VMem in {
-    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">;
+    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4i32, 1>;
   } // End HasUnpackedD16VMem.
-
 }
 
 // ImageLoad for amdgcn.
-multiclass ImageLoadPattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> {
+multiclass ImageLoadPattern<SDPatternOperator name, MIMG opcode,
+                            ValueType dt, ValueType vt, bit d16> {
   def : GCNPat <
     (dt (name vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe,
                 i1:$da)),
-    (opcode $addr, $rsrc,
-          (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc),
-          0, 0, (as_i1imm $lwe), (as_i1imm $da))
+    !con((opcode $addr, $rsrc, (as_i32imm $dmask), 1, (as_i1imm $glc),
+                 (as_i1imm $slc), 0, 0, (as_i1imm $lwe), (as_i1imm $da)),
+         !if(opcode.HasD16, (opcode d16), (opcode)))
   >;
 }
 
-multiclass ImageLoadDataPatterns<SDPatternOperator name, string opcode, ValueType dt, string suffix = ""> {
-  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V1 # suffix), dt, i32>;
-  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V2 # suffix), dt, v2i32>;
-  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4 # suffix), dt, v4i32>;
+multiclass ImageLoadDataPatterns<SDPatternOperator name, string opcode,
+                                 ValueType dt, bit d16> {
+  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V1), dt, i32, d16>;
+  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V2), dt, v2i32, d16>;
+  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4), dt, v4i32, d16>;
 }
 
 // ImageLoad patterns.
 // TODO: support v3f32.
 multiclass ImageLoadPatterns<SDPatternOperator name, string opcode> {
-  defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f32>;
-  defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
-  defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
+  defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f32, 0>;
+  defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2f32, 0>;
+  defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4f32, 0>;
 
   let SubtargetPredicate = HasUnpackedD16VMem in {
-    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16_gfx80">;
+    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
   } // End HasUnpackedD16VMem.
 
   let SubtargetPredicate = HasPackedD16VMem in {
-    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
-    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">;
-    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v4f16, "_D16">;
+    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
+    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), v2f16, 1>;
+    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v4f16, 1>;
   } // End HasPackedD16VMem.
 }
 
 // ImageLoad alternative patterns for illegal vector half Types.
 multiclass ImageLoadAltPatterns<SDPatternOperator name, string opcode> {
   let SubtargetPredicate = HasUnpackedD16VMem in {
-    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">;
-    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4i32,  "_D16_gfx80">;
+    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2i32, 1>;
+    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4i32, 1>;
   } // End HasUnPackedD16VMem.
 }
 
 // ImageStore for amdgcn.
-multiclass ImageStorePattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> {
+multiclass ImageStorePattern<SDPatternOperator name, MIMG opcode,
+                             ValueType dt, ValueType vt, bit d16> {
   def : GCNPat <
     (name dt:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc,
           i1:$lwe, i1:$da),
-    (opcode $data, $addr, $rsrc,
-          (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc),
-          0, 0, (as_i1imm $lwe), (as_i1imm $da))
+    !con((opcode $data, $addr, $rsrc, (as_i32imm $dmask), 1, (as_i1imm $glc),
+                 (as_i1imm $slc), 0, 0, (as_i1imm $lwe), (as_i1imm $da)),
+         !if(opcode.HasD16, (opcode d16), (opcode)))
   >;
 }
 
-multiclass ImageStoreDataPatterns<SDPatternOperator name, string opcode, ValueType dt, string suffix = ""> {
-  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V1 # suffix), dt, i32>;
-  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V2 # suffix), dt, v2i32>;
-  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4 # suffix), dt, v4i32>;
+multiclass ImageStoreDataPatterns<SDPatternOperator name, string opcode,
+                                  ValueType dt, bit d16> {
+  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V1), dt, i32, d16>;
+  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V2), dt, v2i32, d16>;
+  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4), dt, v4i32, d16>;
 }
 
 // ImageStore patterns.
 // TODO: support v3f32.
 multiclass ImageStorePatterns<SDPatternOperator name, string opcode> {
-  defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f32>;
-  defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
-  defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
+  defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f32, 0>;
+  defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2f32, 0>;
+  defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4f32, 0>;
 
   let SubtargetPredicate = HasUnpackedD16VMem in {
-    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16_gfx80">;
+    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
   } // End HasUnpackedD16VMem.
 
   let SubtargetPredicate = HasPackedD16VMem in {
-    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
-    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">;
-    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v4f16, "_D16">;
+    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
+    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), v2f16, 1>;
+    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v4f16, 1>;
   } // End HasPackedD16VMem.
 }
 
 // ImageStore alternative patterns.
 multiclass ImageStoreAltPatterns<SDPatternOperator name, string opcode> {
   let SubtargetPredicate = HasUnpackedD16VMem in {
-    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">;
-    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">;
+    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2i32, 1>;
+    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4i32, 1>;
   } // End HasUnpackedD16VMem.
 
   let SubtargetPredicate = HasPackedD16VMem in {
-    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), i32, "_D16">;
-    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16">;
+    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), i32, 1>;
+    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2i32, 1>;
   } // End HasPackedD16VMem.
 }
 
@@ -1030,83 +992,3 @@
 defm : ImageAtomicPatterns<int_amdgcn_image_atomic_xor, "IMAGE_ATOMIC_XOR">;
 defm : ImageAtomicPatterns<int_amdgcn_image_atomic_inc, "IMAGE_ATOMIC_INC">;
 defm : ImageAtomicPatterns<int_amdgcn_image_atomic_dec, "IMAGE_ATOMIC_DEC">;
-
-/* SIsample for simple 1D texture lookup */
-def : GCNPat <
-  (SIsample i32:$addr, v8i32:$rsrc, v4i32:$sampler, imm),
-  (IMAGE_SAMPLE_V4_V1 $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
->;
-
-class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : GCNPat <
-    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, imm),
-    (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
->;
-
-class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : GCNPat <
-    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_RECT),
-    (opcode $addr, $rsrc, $sampler, 0xf, 1, 0, 0, 0, 0, 0, 0)
->;
-
-class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : GCNPat <
-    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_ARRAY),
-    (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1)
->;
-
-class SampleShadowPattern<SDNode name, MIMG opcode,
-                          ValueType vt> : GCNPat <
-    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW),
-    (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
->;
-
-class SampleShadowArrayPattern<SDNode name, MIMG opcode,
-                               ValueType vt> : GCNPat <
-    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY),
-    (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1)
->;
-
-/* SIsample* for texture lookups consuming more address parameters */
-multiclass SamplePatterns<MIMG sample, MIMG sample_c, MIMG sample_l,
-                          MIMG sample_c_l, MIMG sample_b, MIMG sample_c_b,
-MIMG sample_d, MIMG sample_c_d, ValueType addr_type> {
-  def : SamplePattern <SIsample, sample, addr_type>;
-  def : SampleRectPattern <SIsample, sample, addr_type>;
-  def : SampleArrayPattern <SIsample, sample, addr_type>;
-  def : SampleShadowPattern <SIsample, sample_c, addr_type>;
-  def : SampleShadowArrayPattern <SIsample, sample_c, addr_type>;
-
-  def : SamplePattern <SIsamplel, sample_l, addr_type>;
-  def : SampleArrayPattern <SIsamplel, sample_l, addr_type>;
-  def : SampleShadowPattern <SIsamplel, sample_c_l, addr_type>;
-  def : SampleShadowArrayPattern <SIsamplel, sample_c_l, addr_type>;
-
-  def : SamplePattern <SIsampleb, sample_b, addr_type>;
-  def : SampleArrayPattern <SIsampleb, sample_b, addr_type>;
-  def : SampleShadowPattern <SIsampleb, sample_c_b, addr_type>;
-  def : SampleShadowArrayPattern <SIsampleb, sample_c_b, addr_type>;
-
-  def : SamplePattern <SIsampled, sample_d, addr_type>;
-  def : SampleArrayPattern <SIsampled, sample_d, addr_type>;
-  def : SampleShadowPattern <SIsampled, sample_c_d, addr_type>;
-  def : SampleShadowArrayPattern <SIsampled, sample_c_d, addr_type>;
-}
-
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V2, IMAGE_SAMPLE_C_V4_V2,
-                      IMAGE_SAMPLE_L_V4_V2, IMAGE_SAMPLE_C_L_V4_V2,
-                      IMAGE_SAMPLE_B_V4_V2, IMAGE_SAMPLE_C_B_V4_V2,
-                      IMAGE_SAMPLE_D_V4_V2, IMAGE_SAMPLE_C_D_V4_V2,
-                      v2i32>;
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V4, IMAGE_SAMPLE_C_V4_V4,
-                      IMAGE_SAMPLE_L_V4_V4, IMAGE_SAMPLE_C_L_V4_V4,
-                      IMAGE_SAMPLE_B_V4_V4, IMAGE_SAMPLE_C_B_V4_V4,
-                      IMAGE_SAMPLE_D_V4_V4, IMAGE_SAMPLE_C_D_V4_V4,
-                      v4i32>;
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V8, IMAGE_SAMPLE_C_V4_V8,
-                      IMAGE_SAMPLE_L_V4_V8, IMAGE_SAMPLE_C_L_V4_V8,
-                      IMAGE_SAMPLE_B_V4_V8, IMAGE_SAMPLE_C_B_V4_V8,
-                      IMAGE_SAMPLE_D_V4_V8, IMAGE_SAMPLE_C_D_V4_V8,
-                      v8i32>;
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V16, IMAGE_SAMPLE_C_V4_V16,
-                      IMAGE_SAMPLE_L_V4_V16, IMAGE_SAMPLE_C_L_V4_V16,
-                      IMAGE_SAMPLE_B_V4_V16, IMAGE_SAMPLE_C_B_V4_V16,
-                      IMAGE_SAMPLE_D_V4_V16, IMAGE_SAMPLE_C_D_V4_V16,
-                      v16i32>;
Index: llvm/trunk/lib/Target/AMDGPU/SIDefines.h
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIDefines.h
+++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h
@@ -87,8 +87,8 @@
   // Is a packed VOP3P instruction.
   IsPacked = UINT64_C(1) << 49,
 
-  // "d16" bit set or not.
-  D16 = UINT64_C(1) << 50
+  // Is a D16 buffer instruction.
+  D16Buf = UINT64_C(1) << 50
 };
 
 // v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7762,9 +7762,16 @@
 /// Adjust the writemask of MIMG instructions
 SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
                                           SelectionDAG &DAG) const {
+  unsigned Opcode = Node->getMachineOpcode();
+
+  // Subtract 1 because the vdata output is not a MachineSDNode operand.
+  int D16Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::d16) - 1;
+  if (D16Idx >= 0 && Node->getConstantOperandVal(D16Idx))
+    return Node; // not implemented for D16
+
   SDNode *Users[4] = { nullptr };
   unsigned Lane = 0;
-  unsigned DmaskIdx = (Node->getNumOperands() - Node->getNumValues() == 9) ? 2 : 3;
+  unsigned DmaskIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) - 1;
   unsigned OldDmask = Node->getConstantOperandVal(DmaskIdx);
   unsigned NewDmask = 0;
   bool HasChain = Node->getNumValues() > 1;
@@ -7936,7 +7943,7 @@
   unsigned Opcode = Node->getMachineOpcode();
 
   if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
-      !TII->isGather4(Opcode) && !TII->isD16(Opcode)) {
+      !TII->isGather4(Opcode)) {
     return adjustWritemask(Node, DAG);
   }
 
Index: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
@@ -118,8 +118,8 @@
   // This bit indicates that this is a packed VOP3P instruction
   field bit IsPacked = 0;
 
-  // This bit indicates that this is a D16 instruction.
-  field bit D16 = 0;
+  // This bit indicates that this is a D16 buffer instruction.
+  field bit D16Buf = 0;
 
   // These need to be kept in sync with the enum in SIInstrFlags.
   let TSFlags{0} = SALU;
@@ -176,7 +176,7 @@
 
   let TSFlags{49} = IsPacked;
 
-  let TSFlags{50} = D16;
+  let TSFlags{50} = D16Buf;
 
   let SchedRW = [Write32Bit];
 
@@ -255,7 +255,7 @@
   bits<1> tfe;
   bits<1> lwe;
   bits<1> slc;
-  bits<1> d16 = 0;
+  bit d16;
   bits<8> vaddr;
   bits<7> srsrc;
   bits<7> ssamp;
@@ -344,4 +344,6 @@
 
   let UseNamedOperandTable = 1;
   let hasSideEffects = 0; // XXX ????
+
+  bit HasD16 = 0;
 }
Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
@@ -445,14 +445,6 @@
     return get(Opcode).TSFlags & SIInstrFlags::Gather4;
   }
 
-  static bool isD16(const MachineInstr &MI) {
-    return MI.getDesc().TSFlags & SIInstrFlags::D16;
-  }
-
-  bool isD16(uint16_t Opcode) const {
-    return get(Opcode).TSFlags & SIInstrFlags::D16;
-  }
-
   static bool isFLAT(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
   }
Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
@@ -300,16 +300,6 @@
 def SIImage_gather4_c_b_cl_o  : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_CL_O">;
 def SIImage_gather4_c_lz_o    : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_LZ_O">;
 
-class SDSample<string opcode> : SDNode <opcode,
-  SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v8i32>,
-                       SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]>
->;
-
-def SIsample : SDSample<"AMDGPUISD::SAMPLE">;
-def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">;
-def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
-def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
-
 def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
   SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
 >;
@@ -2079,6 +2069,14 @@
   let ValueCols = [["1"]];
 }
 
+def getMIMGGatherOpPackedD16 : InstrMapping {
+  let FilterClass = "MIMG_Gather_Size";
+  let RowFields = ["Op"];
+  let ColFields = ["Channels"];
+  let KeyCol = ["4"];
+  let ValueCols = [["2"]];
+}
+
 // Maps an commuted opcode to its original version
 def getCommuteOrig : InstrMapping {
   let FilterClass = "Commutable_REV";
Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -166,6 +166,9 @@
                           unsigned Opc, unsigned NewChannels);
 
 LLVM_READONLY
+int getMIMGGatherOpPackedD16(uint16_t Opcode);
+
+LLVM_READONLY
 int getMCOpcode(uint16_t Opcode, unsigned Gen);
 
 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
Index: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
+++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
@@ -61,7 +61,7 @@
     %11.sub6 = COPY %1
     %11.sub7 = COPY %1
     %11.sub8 = COPY %1
-    dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec
+    dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec
     %20.sub1 = COPY %2
     %20.sub2 = COPY %2
     %20.sub3 = COPY %2
@@ -70,6 +70,6 @@
     %20.sub6 = COPY %2
     %20.sub7 = COPY %2
     %20.sub8 = COPY %2
-    dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec
+    dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec
 
 ...
Index: llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir
+++ llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir
@@ -305,11 +305,11 @@
 
 # GCN-LABEL: {{^}}name: image_clause{{$}}
 # GCN:      early-clobber %4:vreg_128, early-clobber %3:vreg_128, early-clobber %5:vreg_128 = BUNDLE %0, undef %2:sreg_128, %1, implicit $exec {
-# GCN-NEXT:   %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT:   %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT:   %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 # GCN-NEXT: }
-# GCN-NEXT: IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 
 ---
 name:            image_clause
@@ -325,17 +325,17 @@
   bb.0:
     %0 = IMPLICIT_DEF
     %1 = IMPLICIT_DEF
-    %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-    %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-    %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-    IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
-    IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
-    IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
+    %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 ...
 
 # GCN-LABEL: {{^}}name: mixed_clause{{$}}
 # GCN:      dead early-clobber %4:vreg_128, dead early-clobber %3:vreg_128, dead early-clobber %5:vgpr_32 = BUNDLE %0, %2, %1, implicit $exec {
-# GCN-NEXT:   dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 # GCN-NEXT:   dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
 # GCN-NEXT:   dead %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, implicit $exec
 # GCN-NEXT: }
@@ -355,7 +355,7 @@
     %0 = IMPLICIT_DEF
     %1 = IMPLICIT_DEF
     %2 = IMPLICIT_DEF
-    %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
     %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
     %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, implicit $exec
 ...
Index: llvm/trunk/test/MC/AMDGPU/mimg.s
===================================================================
--- llvm/trunk/test/MC/AMDGPU/mimg.s
+++ llvm/trunk/test/MC/AMDGPU/mimg.s
@@ -356,20 +356,19 @@
 // GCN: image_gather4 v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x8 ; encoding: [0x00,0x08,0x00,0xf1,0x01,0x05,0x62,0x00]
 
 image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 d16
-// NOSICI:   error: instruction not supported on this GPU
+// NOSICI:   error: d16 modifier is not supported on this GPU
 // GFX8_0:   image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
-// NOGFX8_1: error: instruction not supported on this GPU
-// NOGFX9:   error: instruction not supported on this GPU
+// NOGFX8_1: error: image data size does not match dmask and tfe
+// NOGFX9:   error: image data size does not match dmask and tfe
 
 image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16
 // NOSICI:   error: d16 modifier is not supported on this GPU
-// NOGFX8_0: error: instruction not supported on this GPU
+// NOGFX8_0: error: image data size does not match dmask and tfe
 // GFX8_1:   image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
 // GFX9:     image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
 
-// FIXME: d16 is handled as an optional modifier, should it be corrected?
 image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1
-// NOSICI:   error: d16 modifier is not supported on this GPU
-// NOGFX8_0: error: instruction not supported on this GPU
-// GFX8_1:   image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
-// GFX9:     image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
+// NOSICI:   error: image data size does not match dmask and tfe
+// NOGFX8_0: error: image data size does not match dmask and tfe
+// NOGFX8_1: error: image data size does not match dmask and tfe
+// NOGFX9:   error: image data size does not match dmask and tfe