Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4240,8 +4240,17 @@
         (I >= Intr->GradientStart && I < Intr->CoordStart && !IsG16) ||
         (I >= Intr->CoordStart && !IsA16)) {
       // Handle any gradient or coordinate operands that should not be packed
-      AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0);
-      PackedAddrs.push_back(AddrReg);
+      if ((I < Intr->GradientStart) && IsA16 &&
+          (B.getMRI()->getType(AddrReg) == S16)) {
+        // special handling of bias when A16 is on. Bias is of type half but
+        // occupies full 32-bit.
+        PackedAddrs.push_back(
+            B.buildBuildVector(V2S16, {AddrReg, B.buildUndef(S16).getReg(0)})
+                .getReg(0));
+      } else {
+        AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0);
+        PackedAddrs.push_back(AddrReg);
+      }
     } else {
       // Dz/dh, dz/dv and the last odd coord are packed with undef. Also, in 1D,
       // derivatives dx/dh and dx/dv are packed with undef.
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6209,10 +6209,6 @@
     }
   }
 
-  // Push back extra arguments.
-  for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++)
-    VAddrs.push_back(Op.getOperand(ArgOffset + I));
-
   // Check for 16 bit addresses or derivatives and pack if true.
   MVT VAddrVT =
       Op.getOperand(ArgOffset + Intr->GradientStart).getSimpleValueType();
@@ -6225,6 +6221,19 @@
   MVT AddrPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
   IsA16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;
 
+  // Push back extra arguments.
+  for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++) {
+    if (IsA16 && (Op.getOperand(ArgOffset + I).getValueType() == MVT::f16)) {
+      // special handling of bias when A16 is on. Bias is of type half but
+      // occupies full 32-bit.
+      SDValue bias = DAG.getBuildVector(
+          MVT::v2f16, DL,
+          {Op.getOperand(ArgOffset + I), DAG.getUNDEF(MVT::f16)});
+      VAddrs.push_back(bias);
+    } else
+      VAddrs.push_back(Op.getOperand(ArgOffset + I));
+  }
+
   if (BaseOpcode->Gradients && !ST->hasG16() && (IsA16 != IsG16)) {
     // 16 bit gradients are supported, but are tied to the A16 control
     // so both gradients and addresses must be 16 bit
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll
@@ -6,7 +6,7 @@
   ; GFX9-LABEL: name: sample_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -32,7 +32,7 @@
   ; GFX10-LABEL: name: sample_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -64,7 +64,7 @@
   ; GFX9-LABEL: name: sample_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -91,7 +91,7 @@
   ; GFX10-LABEL: name: sample_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -124,7 +124,7 @@
   ; GFX9-LABEL: name: sample_3d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -155,7 +155,7 @@
   ; GFX10-LABEL: name: sample_3d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -192,7 +192,7 @@
   ; GFX9-LABEL: name: sample_cube
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -223,7 +223,7 @@
   ; GFX10-LABEL: name: sample_cube
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -260,7 +260,7 @@
   ; GFX9-LABEL: name: sample_1darray
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -287,7 +287,7 @@
   ; GFX10-LABEL: name: sample_1darray
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -320,7 +320,7 @@
   ; GFX9-LABEL: name: sample_2darray
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -351,7 +351,7 @@
   ; GFX10-LABEL: name: sample_2darray
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -388,7 +388,7 @@
   ; GFX9-LABEL: name: sample_c_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -418,7 +418,7 @@
   ; GFX10-LABEL: name: sample_c_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -454,7 +454,7 @@
   ; GFX9-LABEL: name: sample_c_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -484,7 +484,7 @@
   ; GFX10-LABEL: name: sample_c_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -520,7 +520,7 @@
   ; GFX9-LABEL: name: sample_cl_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -547,7 +547,7 @@
   ; GFX10-LABEL: name: sample_cl_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -580,7 +580,7 @@
   ; GFX9-LABEL: name: sample_cl_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -611,7 +611,7 @@
   ; GFX10-LABEL: name: sample_cl_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -648,7 +648,7 @@
   ; GFX9-LABEL: name: sample_c_cl_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -678,7 +678,7 @@
   ; GFX10-LABEL: name: sample_c_cl_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -714,7 +714,7 @@
   ; GFX9-LABEL: name: sample_c_cl_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -747,7 +747,7 @@
   ; GFX10-LABEL: name: sample_c_cl_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -781,11 +781,11 @@
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
+define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s) {
   ; GFX9-LABEL: name: sample_b_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -801,10 +801,10 @@
   ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
   ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX9:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
   ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32)
-  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
+  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32)
+  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
   ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GFX9:   $vgpr0 = COPY [[UV]](s32)
@@ -815,7 +815,7 @@
   ; GFX10-LABEL: name: sample_b_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -831,10 +831,10 @@
   ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
   ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
   ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32)
-  ; GFX10:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
+  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32)
+  ; GFX10:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
   ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GFX10:   $vgpr0 = COPY [[UV]](s32)
@@ -843,15 +843,15 @@
   ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
+define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
   ; GFX9-LABEL: name: sample_b_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -868,9 +868,10 @@
   ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX9:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
-  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
+  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
   ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GFX9:   $vgpr0 = COPY [[UV]](s32)
@@ -881,7 +882,7 @@
   ; GFX10-LABEL: name: sample_b_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -898,9 +899,10 @@
   ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
-  ; GFX10:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
+  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+  ; GFX10:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
   ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GFX10:   $vgpr0 = COPY [[UV]](s32)
@@ -909,15 +911,15 @@
   ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
+define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s) {
   ; GFX9-LABEL: name: sample_c_b_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -934,11 +936,11 @@
   ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX9:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
-  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
   ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
-  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
+  ; GFX9:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
+  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
   ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GFX9:   $vgpr0 = COPY [[UV]](s32)
@@ -949,7 +951,7 @@
   ; GFX10-LABEL: name: sample_c_b_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -966,11 +968,11 @@
   ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
-  ; GFX10:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
   ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
-  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
+  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
+  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
+  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GFX10:   $vgpr0 = COPY [[UV]](s32)
   ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
@@ -978,15 +980,15 @@
   ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
+define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
   ; GFX9-LABEL: name: sample_c_b_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1004,10 +1006,11 @@
   ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
   ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX9:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
-  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
-  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
+  ; GFX9:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
   ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GFX9:   $vgpr0 = COPY [[UV]](s32)
@@ -1018,7 +1021,7 @@
   ; GFX10-LABEL: name: sample_c_b_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1036,10 +1039,11 @@
   ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
   ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
-  ; GFX10:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
-  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
+  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
+  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GFX10:   $vgpr0 = COPY [[UV]](s32)
   ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
@@ -1047,15 +1051,15 @@
   ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
+define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %clamp) {
   ; GFX9-LABEL: name: sample_b_cl_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1072,9 +1076,10 @@
   ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX9:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
-  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
+  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
   ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GFX9:   $vgpr0 = COPY [[UV]](s32)
@@ -1085,7 +1090,7 @@
   ; GFX10-LABEL: name: sample_b_cl_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1102,9 +1107,10 @@
   ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
-  ; GFX10:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
+  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+  ; GFX10:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
   ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GFX10:   $vgpr0 = COPY [[UV]](s32)
@@ -1113,15 +1119,15 @@
   ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
+define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
   ; GFX9-LABEL: name: sample_b_cl_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1139,11 +1145,11 @@
   ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
   ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX9:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
   ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
-  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
+  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+  ; GFX9:   [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
+  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
   ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GFX9:   $vgpr0 = COPY [[UV]](s32)
@@ -1154,7 +1160,7 @@
   ; GFX10-LABEL: name: sample_b_cl_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1172,11 +1178,11 @@
   ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
   ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
   ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
-  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
+  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
+  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+  ; GFX10:   [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
+  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GFX10:   $vgpr0 = COPY [[UV]](s32)
   ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
@@ -1184,15 +1190,15 @@
   ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
+define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %clamp) {
   ; GFX9-LABEL: name: sample_c_b_cl_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1210,10 +1216,11 @@
   ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
   ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX9:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
-  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
-  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
+  ; GFX9:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
   ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GFX9:   $vgpr0 = COPY [[UV]](s32)
@@ -1224,7 +1231,7 @@
   ; GFX10-LABEL: name: sample_c_b_cl_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1242,10 +1249,11 @@
   ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
   ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
-  ; GFX10:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
-  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
+  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
+  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GFX10:   $vgpr0 = COPY [[UV]](s32)
   ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
@@ -1253,15 +1261,15 @@
   ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
+define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
   ; GFX9-LABEL: name: sample_c_b_cl_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1280,12 +1288,12 @@
   ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
   ; GFX9:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
   ; GFX9:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GFX9:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
-  ; GFX9:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
   ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
-  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
+  ; GFX9:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+  ; GFX9:   [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
   ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GFX9:   $vgpr0 = COPY [[UV]](s32)
@@ -1296,7 +1304,7 @@
   ; GFX10-LABEL: name: sample_c_b_cl_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1315,12 +1323,12 @@
   ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
   ; GFX10:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
   ; GFX10:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
-  ; GFX10:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
   ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
-  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
+  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
+  ; GFX10:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+  ; GFX10:   [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GFX10:   $vgpr0 = COPY [[UV]](s32)
   ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
@@ -1328,7 +1336,7 @@
   ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
@@ -1336,7 +1344,7 @@
   ; GFX9-LABEL: name: sample_d_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1368,7 +1376,7 @@
   ; GFX10-LABEL: name: sample_d_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1405,7 +1413,7 @@
   ; GFX9-LABEL: name: sample_d_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1439,7 +1447,7 @@
   ; GFX10-LABEL: name: sample_d_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1478,7 +1486,7 @@
   ; GFX9-LABEL: name: sample_d_3d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1519,7 +1527,7 @@
   ; GFX10-LABEL: name: sample_d_3d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1566,7 +1574,7 @@
   ; GFX9-LABEL: name: sample_c_d_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1600,7 +1608,7 @@
   ; GFX10-LABEL: name: sample_c_d_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1639,7 +1647,7 @@
   ; GFX9-LABEL: name: sample_c_d_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1675,7 +1683,7 @@
   ; GFX10-LABEL: name: sample_c_d_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1716,7 +1724,7 @@
   ; GFX9-LABEL: name: sample_d_cl_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1749,7 +1757,7 @@
   ; GFX10-LABEL: name: sample_d_cl_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1787,7 +1795,7 @@
   ; GFX9-LABEL: name: sample_d_cl_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1824,7 +1832,7 @@
   ; GFX10-LABEL: name: sample_d_cl_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1866,7 +1874,7 @@
   ; GFX9-LABEL: name: sample_c_d_cl_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1901,7 +1909,7 @@
   ; GFX10-LABEL: name: sample_c_d_cl_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1941,7 +1949,7 @@
   ; GFX9-LABEL: name: sample_c_d_cl_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -1980,7 +1988,7 @@
   ; GFX10-LABEL: name: sample_c_d_cl_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2024,7 +2032,7 @@
   ; GFX9-LABEL: name: sample_cd_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2056,7 +2064,7 @@
   ; GFX10-LABEL: name: sample_cd_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2093,7 +2101,7 @@
   ; GFX9-LABEL: name: sample_cd_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2127,7 +2135,7 @@
   ; GFX10-LABEL: name: sample_cd_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2166,7 +2174,7 @@
   ; GFX9-LABEL: name: sample_c_cd_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2200,7 +2208,7 @@
   ; GFX10-LABEL: name: sample_c_cd_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2239,7 +2247,7 @@
   ; GFX9-LABEL: name: sample_c_cd_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2275,7 +2283,7 @@
   ; GFX10-LABEL: name: sample_c_cd_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2316,7 +2324,7 @@
   ; GFX9-LABEL: name: sample_cd_cl_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2349,7 +2357,7 @@
   ; GFX10-LABEL: name: sample_cd_cl_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2387,7 +2395,7 @@
   ; GFX9-LABEL: name: sample_cd_cl_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2424,7 +2432,7 @@
   ; GFX10-LABEL: name: sample_cd_cl_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2466,7 +2474,7 @@
   ; GFX9-LABEL: name: sample_c_cd_cl_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2501,7 +2509,7 @@
   ; GFX10-LABEL: name: sample_c_cd_cl_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2541,7 +2549,7 @@
   ; GFX9-LABEL: name: sample_c_cd_cl_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2580,7 +2588,7 @@
   ; GFX10-LABEL: name: sample_c_cd_cl_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2624,7 +2632,7 @@
   ; GFX9-LABEL: name: sample_l_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2651,7 +2659,7 @@
   ; GFX10-LABEL: name: sample_l_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2684,7 +2692,7 @@
   ; GFX9-LABEL: name: sample_l_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2715,7 +2723,7 @@
   ; GFX10-LABEL: name: sample_l_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2752,7 +2760,7 @@
   ; GFX9-LABEL: name: sample_c_l_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2782,7 +2790,7 @@
   ; GFX10-LABEL: name: sample_c_l_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2818,7 +2826,7 @@
   ; GFX9-LABEL: name: sample_c_l_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2851,7 +2859,7 @@
   ; GFX10-LABEL: name: sample_c_l_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2889,7 +2897,7 @@
   ; GFX9-LABEL: name: sample_lz_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2915,7 +2923,7 @@
   ; GFX10-LABEL: name: sample_lz_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2947,7 +2955,7 @@
   ; GFX9-LABEL: name: sample_lz_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -2974,7 +2982,7 @@
   ; GFX10-LABEL: name: sample_lz_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -3007,7 +3015,7 @@
   ; GFX9-LABEL: name: sample_c_lz_1d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -3037,7 +3045,7 @@
   ; GFX10-LABEL: name: sample_c_lz_1d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -3073,7 +3081,7 @@
   ; GFX9-LABEL: name: sample_c_lz_2d
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -3103,7 +3111,7 @@
   ; GFX10-LABEL: name: sample_c_lz_2d
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -3139,7 +3147,7 @@
   ; GFX9-LABEL: name: sample_c_d_o_2darray_V1
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -3176,7 +3184,7 @@
   ; GFX10-LABEL: name: sample_c_d_o_2darray_V1
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -3219,7 +3227,7 @@
   ; GFX9-LABEL: name: sample_c_d_o_2darray_V2
   ; GFX9: bb.1.main_body:
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -3258,7 +3266,7 @@
   ; GFX10-LABEL: name: sample_c_d_o_2darray_V2
   ; GFX10: bb.1.main_body:
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+    ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
@@ -3314,14 +3322,14 @@
 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 
-declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32, half, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 
 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll
@@ -334,25 +334,27 @@
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
+define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
 ; GFX9-LABEL: gather4_b_2d:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    s_mov_b64 s[14:15], exec
 ; GFX9-NEXT:    s_mov_b32 s0, s2
 ; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    s_mov_b32 s2, s4
+; GFX9-NEXT:    s_mov_b32 s4, s6
+; GFX9-NEXT:    s_mov_b32 s6, s8
+; GFX9-NEXT:    s_mov_b32 s8, s10
+; GFX9-NEXT:    s_mov_b32 s10, s12
 ; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
+; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 ; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
 ; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
 ; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
 ; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    s_mov_b32 s8, s10
 ; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s10, s12
 ; GFX9-NEXT:    s_mov_b32 s11, s13
+; GFX9-NEXT:    v_and_or_b32 v0, v0, v3, s12
 ; GFX9-NEXT:    v_and_or_b32 v1, v1, v3, v2
 ; GFX9-NEXT:    s_and_b64 exec, exec, s[14:15]
 ; GFX9-NEXT:    image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
@@ -364,47 +366,52 @@
 ; GFX10NSA-NEXT:    s_mov_b32 s14, exec_lo
 ; GFX10NSA-NEXT:    s_mov_b32 s0, s2
 ; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10NSA-NEXT:    v_mov_b32_e32 v3, 0xffff
 ; GFX10NSA-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX10NSA-NEXT:    s_mov_b32 s1, s3
 ; GFX10NSA-NEXT:    s_mov_b32 s2, s4
-; GFX10NSA-NEXT:    s_mov_b32 s3, s5
 ; GFX10NSA-NEXT:    s_mov_b32 s4, s6
-; GFX10NSA-NEXT:    s_mov_b32 s5, s7
 ; GFX10NSA-NEXT:    s_mov_b32 s6, s8
-; GFX10NSA-NEXT:    s_mov_b32 s7, s9
 ; GFX10NSA-NEXT:    s_mov_b32 s8, s10
-; GFX10NSA-NEXT:    s_mov_b32 s9, s11
 ; GFX10NSA-NEXT:    s_mov_b32 s10, s12
+; GFX10NSA-NEXT:    s_lshl_b32 s12, s0, 16
+; GFX10NSA-NEXT:    s_mov_b32 s1, s3
+; GFX10NSA-NEXT:    s_mov_b32 s3, s5
+; GFX10NSA-NEXT:    s_mov_b32 s5, s7
+; GFX10NSA-NEXT:    s_mov_b32 s7, s9
+; GFX10NSA-NEXT:    s_mov_b32 s9, s11
 ; GFX10NSA-NEXT:    s_mov_b32 s11, s13
-; GFX10NSA-NEXT:    v_and_or_b32 v1, 0xffff, v1, v2
+; GFX10NSA-NEXT:    v_and_or_b32 v0, v0, v3, s12
+; GFX10NSA-NEXT:    v_and_or_b32 v1, v1, v3, v2
 ; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s14
 ; GFX10NSA-NEXT:    image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 ; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10NSA-NEXT:    ; return to shader part epilog
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32 1, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
+define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
 ; GFX9-LABEL: gather4_c_b_2d:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    s_mov_b64 s[14:15], exec
 ; GFX9-NEXT:    s_mov_b32 s0, s2
 ; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    s_mov_b32 s2, s4
+; GFX9-NEXT:    s_mov_b32 s4, s6
+; GFX9-NEXT:    s_mov_b32 s6, s8
+; GFX9-NEXT:    s_mov_b32 s8, s10
+; GFX9-NEXT:    s_mov_b32 s10, s12
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
+; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
 ; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
 ; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
 ; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    s_mov_b32 s8, s10
 ; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s10, s12
 ; GFX9-NEXT:    s_mov_b32 s11, s13
+; GFX9-NEXT:    v_and_or_b32 v0, v0, v4, s12
 ; GFX9-NEXT:    v_and_or_b32 v2, v2, v4, v3
 ; GFX9-NEXT:    s_and_b64 exec, exec, s[14:15]
 ; GFX9-NEXT:    image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16
@@ -416,29 +423,32 @@
 ; GFX10NSA-NEXT:    s_mov_b32 s14, exec_lo
 ; GFX10NSA-NEXT:    s_mov_b32 s0, s2
 ; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10NSA-NEXT:    v_mov_b32_e32 v4, 0xffff
 ; GFX10NSA-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX10NSA-NEXT:    s_mov_b32 s1, s3
 ; GFX10NSA-NEXT:    s_mov_b32 s2, s4
-; GFX10NSA-NEXT:    s_mov_b32 s3, s5
 ; GFX10NSA-NEXT:    s_mov_b32 s4, s6
-; GFX10NSA-NEXT:    s_mov_b32 s5, s7
 ; GFX10NSA-NEXT:    s_mov_b32 s6, s8
-; GFX10NSA-NEXT:    s_mov_b32 s7, s9
 ; GFX10NSA-NEXT:    s_mov_b32 s8, s10
-; GFX10NSA-NEXT:    s_mov_b32 s9, s11
 ; GFX10NSA-NEXT:    s_mov_b32 s10, s12
+; GFX10NSA-NEXT:    s_lshl_b32 s12, s0, 16
+; GFX10NSA-NEXT:    s_mov_b32 s1, s3
+; GFX10NSA-NEXT:    s_mov_b32 s3, s5
+; GFX10NSA-NEXT:    s_mov_b32 s5, s7
+; GFX10NSA-NEXT:    s_mov_b32 s7, s9
+; GFX10NSA-NEXT:    s_mov_b32 s9, s11
 ; GFX10NSA-NEXT:    s_mov_b32 s11, s13
-; GFX10NSA-NEXT:    v_and_or_b32 v2, 0xffff, v2, v3
+; GFX10NSA-NEXT:    v_and_or_b32 v0, v0, v4, s12
+; GFX10NSA-NEXT:    v_and_or_b32 v2, v2, v4, v3
 ; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s14
 ; GFX10NSA-NEXT:    image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 ; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10NSA-NEXT:    ; return to shader part epilog
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32 1, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
+define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
 ; GFX9-LABEL: gather4_b_cl_2d:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    s_mov_b64 s[14:15], exec
@@ -450,14 +460,15 @@
 ; GFX9-NEXT:    s_mov_b32 s8, s10
 ; GFX9-NEXT:    s_mov_b32 s10, s12
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 ; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 ; GFX9-NEXT:    s_mov_b32 s1, s3
 ; GFX9-NEXT:    s_mov_b32 s3, s5
 ; GFX9-NEXT:    s_mov_b32 s5, s7
 ; GFX9-NEXT:    s_mov_b32 s7, s9
 ; GFX9-NEXT:    s_mov_b32 s9, s11
 ; GFX9-NEXT:    s_mov_b32 s11, s13
+; GFX9-NEXT:    v_and_or_b32 v0, v0, v4, s12
 ; GFX9-NEXT:    v_and_or_b32 v1, v1, v4, v2
 ; GFX9-NEXT:    v_and_or_b32 v2, v3, v4, s12
 ; GFX9-NEXT:    s_and_b64 exec, exec, s[14:15]
@@ -484,6 +495,7 @@
 ; GFX10NSA-NEXT:    s_mov_b32 s7, s9
 ; GFX10NSA-NEXT:    s_mov_b32 s9, s11
 ; GFX10NSA-NEXT:    s_mov_b32 s11, s13
+; GFX10NSA-NEXT:    v_and_or_b32 v0, v0, v4, s12
 ; GFX10NSA-NEXT:    v_and_or_b32 v1, v1, v4, v2
 ; GFX10NSA-NEXT:    v_and_or_b32 v2, v3, v4, s12
 ; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s14
@@ -491,11 +503,11 @@
 ; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10NSA-NEXT:    ; return to shader part epilog
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32 1, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
+define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
 ; GFX9-LABEL: gather4_c_b_cl_2d:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    s_mov_b64 s[14:15], exec
@@ -507,14 +519,15 @@
 ; GFX9-NEXT:    s_mov_b32 s8, s10
 ; GFX9-NEXT:    s_mov_b32 s10, s12
 ; GFX9-NEXT:    v_mov_b32_e32 v5, 0xffff
-; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
+; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX9-NEXT:    s_mov_b32 s1, s3
 ; GFX9-NEXT:    s_mov_b32 s3, s5
 ; GFX9-NEXT:    s_mov_b32 s5, s7
 ; GFX9-NEXT:    s_mov_b32 s7, s9
 ; GFX9-NEXT:    s_mov_b32 s9, s11
 ; GFX9-NEXT:    s_mov_b32 s11, s13
+; GFX9-NEXT:    v_and_or_b32 v0, v0, v5, s12
 ; GFX9-NEXT:    v_and_or_b32 v2, v2, v5, v3
 ; GFX9-NEXT:    v_and_or_b32 v3, v4, v5, s12
 ; GFX9-NEXT:    s_and_b64 exec, exec, s[14:15]
@@ -541,6 +554,7 @@
 ; GFX10NSA-NEXT:    s_mov_b32 s7, s9
 ; GFX10NSA-NEXT:    s_mov_b32 s9, s11
 ; GFX10NSA-NEXT:    s_mov_b32 s11, s13
+; GFX10NSA-NEXT:    v_and_or_b32 v0, v0, v5, s12
 ; GFX10NSA-NEXT:    v_and_or_b32 v2, v2, v5, v3
 ; GFX10NSA-NEXT:    v_and_or_b32 v3, v4, v5, s12
 ; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s14
@@ -548,7 +562,7 @@
 ; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10NSA-NEXT:    ; return to shader part epilog
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32 1, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   ret <4 x float> %v
 }
 
@@ -750,10 +764,10 @@
 declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
 declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
 declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 immarg, float, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 immarg, float, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
+declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32 immarg, half, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32 immarg, half, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32 immarg, half, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
 declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
 declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
 declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
@@ -166,7 +166,7 @@
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
+define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
 ; GFX9-LABEL: gather4_b_2d:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
@@ -189,11 +189,11 @@
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32 1, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
+define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
 ; GFX9-LABEL: gather4_c_b_2d:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
@@ -216,11 +216,11 @@
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32 1, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
+define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
 ; GFX9-LABEL: gather4_b_cl_2d:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
@@ -245,11 +245,11 @@
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32 1, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
+define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
 ; GFX9-LABEL: gather4_c_b_cl_2d:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
@@ -275,7 +275,7 @@
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32 1, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
@@ -373,10 +373,10 @@
 declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 
-declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 
 declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
@@ -320,7 +320,7 @@
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
+define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s) {
 ; GFX9-LABEL: sample_b_1d:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
@@ -339,11 +339,11 @@
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
+define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
 ; GFX9-LABEL: sample_b_2d:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
@@ -366,11 +366,11 @@
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
+define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s) {
 ; GFX9-LABEL: sample_c_b_1d:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
@@ -389,11 +389,11 @@
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
+define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
 ; GFX9-LABEL: sample_c_b_2d:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
@@ -416,11 +416,11 @@
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
+define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %clamp) {
 ; GFX9-LABEL: sample_b_cl_1d:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
@@ -443,11 +443,11 @@
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
+define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
 ; GFX9-LABEL: sample_b_cl_2d:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
@@ -472,11 +472,11 @@
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
+define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %clamp) {
 ; GFX9-LABEL: sample_c_b_cl_1d:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
@@ -499,11 +499,11 @@
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
+define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
 ; GFX9-LABEL: sample_c_b_cl_2d:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
@@ -529,7 +529,7 @@
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
@@ -1229,14 +1229,14 @@
 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 
-declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32, half, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 
 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1