diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -744,8 +744,8 @@
   list<int> AllNumAddrWords =
     !foreach(dw, !if(sample.Gradients,
                      !if(!eq(sample.LodOrClamp, ""),
-                         [2, 3, 4, 5, 6, 7, 9],
-                         [2, 3, 4, 5, 7, 8, 10]),
+                         [2, 3, 4, 5, 6, 7, 8, 9],
+                         [2, 3, 4, 5, 6, 7, 8, 9, 10]),
                      !if(!eq(sample.LodOrClamp, ""),
                          [1, 2, 3],
                          [1, 2, 3, 4])),
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6085,56 +6085,73 @@
   MVT VAddrVT =
       Op.getOperand(ArgOffset + Intr->GradientStart).getSimpleValueType();
   MVT VAddrScalarVT = VAddrVT.getScalarType();
-  MVT PackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
+  MVT GradPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
   IsG16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;
 
   VAddrVT = Op.getOperand(ArgOffset + Intr->CoordStart).getSimpleValueType();
   VAddrScalarVT = VAddrVT.getScalarType();
+  MVT AddrPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
   IsA16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;
-  if (IsA16 || IsG16) {
-    if (IsA16) {
-      if (!ST->hasA16()) {
-        LLVM_DEBUG(dbgs() << "Failed to lower image intrinsic: Target does not "
-                             "support 16 bit addresses\n");
-        return Op;
-      }
-      if (!IsG16) {
-        LLVM_DEBUG(
-            dbgs() << "Failed to lower image intrinsic: 16 bit addresses "
-                      "need 16 bit derivatives but got 32 bit derivatives\n");
-        return Op;
-      }
-    } else if (!ST->hasG16()) {
-      LLVM_DEBUG(dbgs() << "Failed to lower image intrinsic: Target does not "
-                           "support 16 bit derivatives\n");
+
+  bool IsTiedG16 = false;
+  if (BaseOpcode->Gradients && !ST->hasG16()) {
+    // 16 bit gradients are supported, but are tied to the A16 control
+    if (IsA16 && !IsG16) {
+      LLVM_DEBUG(
+          dbgs() << "Failed to lower image intrinsic: 16 bit addresses "
+                    "need 16 bit derivatives but got 32 bit derivatives\n");
       return Op;
     }
+    if (!IsA16 && IsG16) {
+      LLVM_DEBUG(
+          dbgs()
+          << "Failed to lower image intrinsic: 16 bit addresses "
+             "need 16 bit addresses for 16 bit derivatives (got 32 bit)");
+      return Op;
+    }
+    if (IsA16 && IsG16)
+      IsTiedG16 = true;
+  }
 
-    if (BaseOpcode->Gradients && !IsA16) {
-      if (!ST->hasG16()) {
-        LLVM_DEBUG(dbgs() << "Failed to lower image intrinsic: Target does not "
-                             "support 16 bit derivatives\n");
-        return Op;
-      }
-      // Activate g16
-      const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
-          AMDGPU::getMIMGG16MappingInfo(Intr->BaseOpcode);
-      IntrOpcode = G16MappingInfo->G16; // set new opcode to variant with _g16
+  if (IsA16) {
+    if (!ST->hasA16()) {
+      LLVM_DEBUG(dbgs() << "Failed to lower image intrinsic: Target does not "
+                           "support 16 bit addresses\n");
+      return Op;
     }
+  }
 
-    // Don't compress addresses for G16
-    const int PackEndIdx = IsA16 ? VAddrEnd : (ArgOffset + Intr->CoordStart);
-    packImageA16AddressToDwords(DAG, Op, PackVectorVT, VAddrs,
-                                ArgOffset + Intr->GradientStart, PackEndIdx,
-                                Intr->NumGradients);
+  // We've dealt with incorrect input so we know that if IsA16, IsG16 or
+  // IsTiedG16 are set then we have to compress/pack operands (either address,
+  // gradient or both)
+  if (BaseOpcode->Gradients && IsG16 && !IsTiedG16) {
+    // Activate g16
+    const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
+        AMDGPU::getMIMGG16MappingInfo(Intr->BaseOpcode);
+    IntrOpcode = G16MappingInfo->G16; // set new opcode to variant with _g16
+  }
 
-    if (!IsA16) {
-      // Add uncompressed address
-      for (unsigned I = ArgOffset + Intr->CoordStart; I < VAddrEnd; I++)
-        VAddrs.push_back(Op.getOperand(I));
-    }
+  // Add gradients (packed or unpacked)
+  if (IsG16) {
+    // Pack the gradients
+    // const int PackEndIdx = IsA16 ? VAddrEnd : (ArgOffset + Intr->CoordStart);
+    packImageA16AddressToDwords(
+        DAG, Op, GradPackVectorVT, VAddrs, ArgOffset + Intr->GradientStart,
+        ArgOffset + Intr->CoordStart, Intr->NumGradients);
+  } else {
+    for (unsigned I = ArgOffset + Intr->GradientStart;
+         I < ArgOffset + Intr->CoordStart; I++)
+      VAddrs.push_back(Op.getOperand(I));
+  }
+
+  // Add addresses (packed or unpacked)
+  if (IsA16) {
+    packImageA16AddressToDwords(DAG, Op, AddrPackVectorVT, VAddrs,
+                                ArgOffset + Intr->CoordStart, VAddrEnd,
+                                0 /* No gradients */);
   } else {
-    for (unsigned I = ArgOffset + Intr->GradientStart; I < VAddrEnd; I++)
+    // Add uncompressed address
+    for (unsigned I = ArgOffset + Intr->CoordStart; I < VAddrEnd; I++)
       VAddrs.push_back(Op.getOperand(I));
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
@@ -542,7 +542,7 @@
 ;
 ; GFX10-LABEL: sample_d_1d:
 ; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -573,7 +573,7 @@
 ; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
 ; GFX10-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
 ; GFX10-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
-; GFX10-NEXT:    image_sample_d v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -607,7 +607,7 @@
 ; GFX10-NEXT:    v_lshl_or_b32 v6, v7, 16, v6
 ; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
 ; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-NEXT:    image_sample_d v[0:3], [v0, v2, v3, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
+; GFX10-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -624,7 +624,7 @@
 ;
 ; GFX10-LABEL: sample_c_d_1d:
 ; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -657,7 +657,7 @@
 ; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
 ; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
 ; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
-; GFX10-NEXT:    image_sample_c_d v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -678,7 +678,7 @@
 ; GFX10:       ; %bb.0: ; %main_body
 ; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
-; GFX10-NEXT:    image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -709,7 +709,7 @@
 ; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
 ; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
 ; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-NEXT:    image_sample_d_cl v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -730,7 +730,7 @@
 ; GFX10:       ; %bb.0: ; %main_body
 ; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
-; GFX10-NEXT:    image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -763,7 +763,7 @@
 ; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
 ; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
 ; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
-; GFX10-NEXT:    image_sample_c_d_cl v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -780,7 +780,7 @@
 ;
 ; GFX10-LABEL: sample_cd_1d:
 ; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -811,7 +811,7 @@
 ; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
 ; GFX10-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
 ; GFX10-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
-; GFX10-NEXT:    image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    image_sample_cd_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -828,7 +828,7 @@
 ;
 ; GFX10-LABEL: sample_c_cd_1d:
 ; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -861,7 +861,7 @@
 ; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
 ; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
 ; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
-; GFX10-NEXT:    image_sample_c_cd v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -882,7 +882,7 @@
 ; GFX10:       ; %bb.0: ; %main_body
 ; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
-; GFX10-NEXT:    image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    image_sample_cd_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -913,7 +913,7 @@
 ; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
 ; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
 ; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-NEXT:    image_sample_cd_cl v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -934,7 +934,7 @@
 ; GFX10:       ; %bb.0: ; %main_body
 ; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
-; GFX10-NEXT:    image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    image_sample_c_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -967,7 +967,7 @@
 ; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
 ; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
 ; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
-; GFX10-NEXT:    image_sample_c_cd_cl v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -1163,7 +1163,7 @@
 ; GFX10-NEXT:    v_lshl_or_b32 v6, v7, 16, v6
 ; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
 ; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
-; GFX10-NEXT:    image_sample_c_d_o v0, [v0, v1, v2, v4, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
+; GFX10-NEXT:    image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
@@ -1197,7 +1197,7 @@
 ; GFX10-NEXT:    v_lshl_or_b32 v6, v7, 16, v6
 ; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
 ; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
-; GFX10-NEXT:    image_sample_c_d_o v[0:1], [v0, v1, v2, v4, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
+; GFX10-NEXT:    image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll
@@ -0,0 +1,555 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
+; Depends on some other changes to pass this test - those are in review separately
+; XFAIL: *
+
+define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s) {
+; GFX10-LABEL: sample_d_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
+; GFX10-LABEL: sample_d_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v4, 0xffff, v4
+; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX10-NEXT:    image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r) {
+; GFX10-LABEL: sample_d_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v6, 0xffff, v6
+; GFX10-NEXT:    v_lshl_or_b32 v6, v7, 16, v6
+; GFX10-NEXT:    image_sample_d v[0:3], [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s) {
+; GFX10-LABEL: sample_c_d_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
+; GFX10-LABEL: sample_c_d_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v5, 0xffff, v5
+; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
+; GFX10-NEXT:    image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) {
+; GFX10-LABEL: sample_d_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
+; GFX10-LABEL: sample_d_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v4, 0xffff, v4
+; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX10-NEXT:    image_sample_d_cl v[0:3], [v0, v1, v2, v3, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) {
+; GFX10-LABEL: sample_c_d_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT:    image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
+; GFX10-LABEL: sample_c_d_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v5, 0xffff, v5
+; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
+; GFX10-NEXT:    image_sample_c_d_cl v[0:3], [v0, v1, v2, v3, v4, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s) {
+; GFX10-LABEL: sample_cd_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
+; GFX10-LABEL: sample_cd_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v4, 0xffff, v4
+; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX10-NEXT:    image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s) {
+; GFX10-LABEL: sample_c_cd_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
+; GFX10-LABEL: sample_c_cd_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v5, 0xffff, v5
+; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
+; GFX10-NEXT:    image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) {
+; GFX10-LABEL: sample_cd_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
+; GFX10-LABEL: sample_cd_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v4, 0xffff, v4
+; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX10-NEXT:    image_sample_cd_cl v[0:3], [v0, v1, v2, v3, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) {
+; GFX10-LABEL: sample_c_cd_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT:    image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
+; GFX10-LABEL: sample_c_cd_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v5, 0xffff, v5
+; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
+; GFX10-NEXT:    image_sample_c_cd_cl v[0:3], [v0, v1, v2, v3, v4, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {
+; GFX10-LABEL: sample_c_d_o_2darray_V1:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v6, 0xffff, v6
+; GFX10-NEXT:    v_lshl_or_b32 v6, v7, 16, v6
+; GFX10-NEXT:    image_sample_c_d_o v0, [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret float %v
+}
+
+define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {
+; GFX10-LABEL: sample_c_d_o_2darray_V2:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v6, 0xffff, v6
+; GFX10-NEXT:    v_lshl_or_b32 v6, v7, 16, v6
+; GFX10-NEXT:    image_sample_c_d_o v[0:1], [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <2 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32, float, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
+; GFX10-LABEL: sample_g16_noa16_d_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+; GFX10-LABEL: sample_g16_noa16_d_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v6, 0xffff
+; GFX10-NEXT:    v_and_b32_e32 v2, v6, v2
+; GFX10-NEXT:    v_and_b32_e32 v0, v6, v0
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
+; GFX10-LABEL: sample_g16_noa16_d_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v9, 0xffff
+; GFX10-NEXT:    v_and_b32_e32 v3, v9, v3
+; GFX10-NEXT:    v_and_b32_e32 v0, v9, v0
+; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
+; GFX10-LABEL: sample_g16_noa16_c_d_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+; GFX10-LABEL: sample_g16_noa16_c_d_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT:    v_and_b32_e32 v3, v7, v3
+; GFX10-NEXT:    v_and_b32_e32 v1, v7, v1
+; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
+; GFX10-LABEL: sample_g16_noa16_d_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+; GFX10-LABEL: sample_g16_noa16_d_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT:    v_and_b32_e32 v2, v7, v2
+; GFX10-NEXT:    v_and_b32_e32 v0, v7, v0
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
+; GFX10-LABEL: sample_g16_noa16_c_d_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+; GFX10-LABEL: sample_g16_noa16_c_d_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v8, 0xffff
+; GFX10-NEXT:    v_and_b32_e32 v3, v8, v3
+; GFX10-NEXT:    v_and_b32_e32 v1, v8, v1
+; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
+; GFX10-LABEL: sample_g16_noa16_cd_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+; GFX10-LABEL: sample_g16_noa16_cd_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v6, 0xffff
+; GFX10-NEXT:    v_and_b32_e32 v2, v6, v2
+; GFX10-NEXT:    v_and_b32_e32 v0, v6, v0
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    image_sample_cd_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
+; GFX10-LABEL: sample_g16_noa16_c_cd_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+; GFX10-LABEL: sample_g16_noa16_c_cd_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT:    v_and_b32_e32 v3, v7, v3
+; GFX10-NEXT:    v_and_b32_e32 v1, v7, v1
+; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
+; GFX10-LABEL: sample_g16_noa16_cd_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+; GFX10-LABEL: sample_g16_noa16_cd_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT:    v_and_b32_e32 v2, v7, v2
+; GFX10-NEXT:    v_and_b32_e32 v0, v7, v0
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
+; GFX10-LABEL: sample_g16_noa16_c_cd_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_cd_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+; GFX10-LABEL: sample_g16_noa16_c_cd_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v8, 0xffff
+; GFX10-NEXT:    v_and_b32_e32 v3, v8, v3
+; GFX10-NEXT:    v_and_b32_e32 v1, v8, v1
+; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps float @sample_g16_noa16_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
+; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V1:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v9, 0xffff
+; GFX10-NEXT:    v_and_b32_e32 v4, v9, v4
+; GFX10-NEXT:    v_and_b32_e32 v2, v9, v2
+; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret float %v
+}
+
+define amdgpu_ps <2 x float> @sample_g16_noa16_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
+; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V2:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v9, 0xffff
+; GFX10-NEXT:    v_and_b32_e32 v4, v9, v4
+; GFX10-NEXT:    v_and_b32_e32 v2, v9, v2
+; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <2 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }