Index: lib/Target/AMDGPU/SIISelLowering.h
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.h
+++ lib/Target/AMDGPU/SIISelLowering.h
@@ -23,8 +23,6 @@
 class SITargetLowering : public AMDGPUTargetLowering {
   SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL,
                          SDValue Chain, unsigned Offset, bool Signed) const;
-  SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
-                               SelectionDAG &DAG) const;
   SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
                              SelectionDAG &DAG) const override;
 
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1366,14 +1366,6 @@
     return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
                                    Op->getVTList(), Ops, VT, MMO);
   }
-  case AMDGPUIntrinsic::SI_sample:
-    return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
-  case AMDGPUIntrinsic::SI_sampleb:
-    return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG);
-  case AMDGPUIntrinsic::SI_sampled:
-    return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG);
-  case AMDGPUIntrinsic::SI_samplel:
-    return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
   case AMDGPUIntrinsic::SI_vs_load_input:
     return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
                        Op.getOperand(1),
@@ -1555,15 +1547,6 @@
   return AMDGPUTargetLowering::LowerLOAD(Op, DAG);
 }
 
-SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
-                                               const SDValue &Op,
-                                               SelectionDAG &DAG) const {
-  return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1),
-                     Op.getOperand(2),
-                     Op.getOperand(3),
-                     Op.getOperand(4));
-}
-
 SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   if (Op.getValueType() != MVT::i64)
     return SDValue();
Index: lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- lib/Target/AMDGPU/SIInstructions.td
+++ lib/Target/AMDGPU/SIInstructions.td
@@ -2442,38 +2442,6 @@
     (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
 >;
 
-multiclass ImageLoadPatterns<MIMG opcode, ValueType addr_type> {
-  def : ImageLoadPattern <int_SI_imageload, opcode, addr_type>;
-  def : ImageLoadArrayPattern <int_SI_imageload, opcode, addr_type>;
-}
-
-multiclass ImageLoadMSAAPatterns<MIMG opcode, ValueType addr_type> {
-  def : ImageLoadMSAAPattern <int_SI_imageload, opcode, addr_type>;
-  def : ImageLoadArrayMSAAPattern <int_SI_imageload, opcode, addr_type>;
-}
-
-defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V2, v2i32>;
-defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V4, v4i32>;
-
-defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V2, v2i32>;
-defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V4, v4i32>;
-
-/* Image resource information */
-def : Pat <
-  (int_SI_resinfo i32:$mipid, v32i8:$rsrc, imm),
-  (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
->;
-
-def : Pat <
-  (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY),
-  (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
->;
-
-def : Pat <
-  (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY_MSAA),
-  (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
->;
-
 /********** ============================================ **********/
 /********** Extraction, Insertion, Building and Casting  **********/
 /********** ============================================ **********/
Index: lib/Target/AMDGPU/SIIntrinsics.td
===================================================================
--- lib/Target/AMDGPU/SIIntrinsics.td
+++ lib/Target/AMDGPU/SIIntrinsics.td
@@ -172,16 +172,6 @@
   def int_SI_image_load_mip : Image;
   def int_SI_getresinfo : Image;
 
-  // Deprecated image and sample intrinsics.
-  class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
-
-  def int_SI_sample : Sample;
-  def int_SI_sampleb : Sample;
-  def int_SI_sampled : Sample;
-  def int_SI_samplel : Sample;
-  def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
-
   /* Interpolation Intrinsics */
 
   def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
Index: test/CodeGen/AMDGPU/commute-shifts.ll
===================================================================
--- test/CodeGen/AMDGPU/commute-shifts.ll
+++ test/CodeGen/AMDGPU/commute-shifts.ll
@@ -4,29 +4,25 @@
 ; GCN-LABEL: {{^}}main:
 ; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
 ; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1
-
 define void @main() #0 {
-main_body:
-  %0 = fptosi float undef to i32
-  %1 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> undef, <32 x i8> undef, i32 2)
-  %2 = extractelement <4 x i32> %1, i32 0
-  %3 = and i32 %0, 7
-  %4 = shl i32 1, %3
-  %5 = and i32 %2, %4
-  %6 = icmp eq i32 %5, 0
-  %.10 = select i1 %6, float 0.000000e+00, float undef
-  %7 = call i32 @llvm.SI.packf16(float undef, float %.10)
-  %8 = bitcast i32 %7 to float
-  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %8, float undef, float %8)
+bb:
+  %tmp = fptosi float undef to i32
+  %tmp1 = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp2.f = extractelement <4 x float> %tmp1, i32 0
+  %tmp2 = bitcast float %tmp2.f to i32
+  %tmp3 = and i32 %tmp, 7
+  %tmp4 = shl i32 1, %tmp3
+  %tmp5 = and i32 %tmp2, %tmp4
+  %tmp6 = icmp eq i32 %tmp5, 0
+  %tmp7 = select i1 %tmp6, float 0.000000e+00, float undef
+  %tmp8 = call i32 @llvm.SI.packf16(float undef, float %tmp7)
+  %tmp9 = bitcast i32 %tmp8 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %tmp9, float undef, float %tmp9)
   ret void
 }
 
-; Function Attrs: nounwind readnone
-declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1
-
-; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
 declare i32 @llvm.SI.packf16(float, float) #1
-
 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
 
 attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
Index: test/CodeGen/AMDGPU/llvm.SI.image.sample-masked.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/llvm.SI.image.sample-masked.ll
@@ -0,0 +1,96 @@
+;RUN: llc < %s -march=amdgcn -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s
+
+; CHECK-LABEL: {{^}}v1:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 13
+define void @v1(i32 %a1) #0 {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = extractelement <4 x float> %1, i32 2
+  %4 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
+  ret void
+}
+
+; CHECK-LABEL: {{^}}v2:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 11
+define void @v2(i32 %a1) #0 {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = extractelement <4 x float> %1, i32 1
+  %4 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
+  ret void
+}
+
+; CHECK-LABEL: {{^}}v3:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
+define void @v3(i32 %a1) #0 {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %2 = extractelement <4 x float> %1, i32 1
+  %3 = extractelement <4 x float> %1, i32 2
+  %4 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
+  ret void
+}
+
+; CHECK-LABEL: {{^}}v4:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 7
+define void @v4(i32 %a1) #0 {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = extractelement <4 x float> %1, i32 1
+  %4 = extractelement <4 x float> %1, i32 2
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
+  ret void
+}
+
+; CHECK-LABEL: {{^}}v5:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
+define void @v5(i32 %a1) #0 {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %2 = extractelement <4 x float> %1, i32 1
+  %3 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
+  ret void
+}
+
+; CHECK-LABEL: {{^}}v6:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 6
+define void @v6(i32 %a1) #0 {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %2 = extractelement <4 x float> %1, i32 1
+  %3 = extractelement <4 x float> %1, i32 2
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
+  ret void
+}
+
+; CHECK-LABEL: {{^}}v7:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 9
+define void @v7(i32 %a1) #0 {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
+  ret void
+}
+
+declare <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
Index: test/CodeGen/AMDGPU/llvm.SI.imageload.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.SI.imageload.ll
+++ /dev/null
@@ -1,132 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK-DAG: image_load {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
-;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v[0-9]+}}, 2, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v[0-9]+}}, 1, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v[0-9]+}}, 4, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v[0-9]+}}, 8, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
-;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v[0-9]+}}, 8, 0, 0, -1
-
-define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
-   %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
-   %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
-   %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
-   %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
-   %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
-   %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
-   %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
-   %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
-   %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
-   %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
-   %res1 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v1,
-      <32 x i8> undef, i32 1)
-   %res2 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v2,
-      <32 x i8> undef, i32 2)
-   %res3 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v3,
-      <32 x i8> undef, i32 3)
-   %res4 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v4,
-      <32 x i8> undef, i32 4)
-   %res5 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v5,
-      <32 x i8> undef, i32 5)
-   %res6 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v6,
-      <32 x i8> undef, i32 6)
-   %res10 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v10,
-      <32 x i8> undef, i32 10)
-   %res11 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v11,
-      <32 x i8> undef, i32 11)
-   %res15 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v15,
-      <32 x i8> undef, i32 15)
-   %res16 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v16,
-      <32 x i8> undef, i32 16)
-   %e1 = extractelement <4 x i32> %res1, i32 0
-   %e2 = extractelement <4 x i32> %res2, i32 1
-   %e3 = extractelement <4 x i32> %res3, i32 2
-   %e4 = extractelement <4 x i32> %res4, i32 3
-   %t0 = extractelement <4 x i32> %res5, i32 0
-   %t1 = extractelement <4 x i32> %res5, i32 1
-   %e5 = add i32 %t0, %t1
-   %t2 = extractelement <4 x i32> %res6, i32 0
-   %t3 = extractelement <4 x i32> %res6, i32 2
-   %e6 = add i32 %t2, %t3
-   %t10 = extractelement <4 x i32> %res10, i32 2
-   %t11 = extractelement <4 x i32> %res10, i32 3
-   %e10 = add i32 %t10, %t11
-   %t12 = extractelement <4 x i32> %res11, i32 0
-   %t13 = extractelement <4 x i32> %res11, i32 1
-   %t14 = extractelement <4 x i32> %res11, i32 2
-   %t15 = add i32 %t12, %t13
-   %e11 = add i32 %t14, %t15
-   %t28 = extractelement <4 x i32> %res15, i32 0
-   %t29 = extractelement <4 x i32> %res15, i32 1
-   %t30 = extractelement <4 x i32> %res15, i32 2
-   %t31 = extractelement <4 x i32> %res15, i32 3
-   %t32 = add i32 %t28, %t29
-   %t33 = add i32 %t30, %t31
-   %e15 = add i32 %t32, %t33
-   %e16 = extractelement <4 x i32> %res16, i32 3
-   %s1 = add i32 %e1, %e2
-   %s2 = add i32 %s1, %e3
-   %s3 = add i32 %s2, %e4
-   %s4 = add i32 %s3, %e5
-   %s5 = add i32 %s4, %e6
-   %s9 = add i32 %s5, %e10
-   %s10 = add i32 %s9, %e11
-   %s14 = add i32 %s10, %e15
-   %s15 = add i32 %s14, %e16
-   %s16 = bitcast i32 %s15 to float
-   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s16, float %s16, float %s16, float %s16)
-   ret void
-}
-
-; Test that ccordinates are stored in vgprs and not sgprs
-; CHECK: vgpr_coords
-; CHECK: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}
-define void @vgpr_coords(float addrspace(2)* addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
-main_body:
-  %20 = getelementptr float addrspace(2)*, float addrspace(2)* addrspace(2)* %0, i32 0
-  %21 = load float addrspace(2)*, float addrspace(2)* addrspace(2)* %20, !tbaa !2
-  %22 = getelementptr float, float addrspace(2)* %21, i32 0
-  %23 = load float, float addrspace(2)* %22, !tbaa !2, !invariant.load !1
-  %24 = getelementptr float, float addrspace(2)* %21, i32 1
-  %25 = load float, float addrspace(2)* %24, !tbaa !2, !invariant.load !1
-  %26 = getelementptr float, float addrspace(2)* %21, i32 4
-  %27 = load float, float addrspace(2)* %26, !tbaa !2, !invariant.load !1
-  %28 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
-  %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, !tbaa !2
-  %30 = bitcast float %27 to i32
-  %31 = bitcast float %23 to i32
-  %32 = bitcast float %25 to i32
-  %33 = insertelement <4 x i32> undef, i32 %31, i32 0
-  %34 = insertelement <4 x i32> %33, i32 %32, i32 1
-  %35 = insertelement <4 x i32> %34, i32 %30, i32 2
-  %36 = insertelement <4 x i32> %35, i32 undef, i32 3
-  %37 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %36, <32 x i8> %29, i32 2)
-  %38 = extractelement <4 x i32> %37, i32 0
-  %39 = extractelement <4 x i32> %37, i32 1
-  %40 = extractelement <4 x i32> %37, i32 2
-  %41 = extractelement <4 x i32> %37, i32 3
-  %42 = bitcast i32 %38 to float
-  %43 = bitcast i32 %39 to float
-  %44 = bitcast i32 %40 to float
-  %45 = bitcast i32 %41 to float
-  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %42, float %43, float %44, float %45)
-  ret void
-}
-
-declare <4 x i32> @llvm.SI.imageload.(<4 x i32>, <32 x i8>, i32) readnone
-; Function Attrs: nounwind readnone
-declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
-
-!0 = !{!"const", null}
-!1 = !{}
-!2 = !{!0, !0, i64 0, i32 1}
Index: test/CodeGen/AMDGPU/llvm.SI.resinfo.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.SI.resinfo.ll
+++ /dev/null
@@ -1,111 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
-
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 2, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 1, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 4, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 8, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 9, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 6, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 10, 0, 0, -1
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 11, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 13, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 14, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 8, 0, 0, -1
-
-define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8,
-		  i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15, i32 %a16) {
-   %res1 = call <4 x i32> @llvm.SI.resinfo(i32 %a1, <32 x i8> undef, i32 1)
-   %res2 = call <4 x i32> @llvm.SI.resinfo(i32 %a2, <32 x i8> undef, i32 2)
-   %res3 = call <4 x i32> @llvm.SI.resinfo(i32 %a3, <32 x i8> undef, i32 3)
-   %res4 = call <4 x i32> @llvm.SI.resinfo(i32 %a4, <32 x i8> undef, i32 4)
-   %res5 = call <4 x i32> @llvm.SI.resinfo(i32 %a5, <32 x i8> undef, i32 5)
-   %res6 = call <4 x i32> @llvm.SI.resinfo(i32 %a6, <32 x i8> undef, i32 6)
-   %res7 = call <4 x i32> @llvm.SI.resinfo(i32 %a7, <32 x i8> undef, i32 7)
-   %res8 = call <4 x i32> @llvm.SI.resinfo(i32 %a8, <32 x i8> undef, i32 8)
-   %res9 = call <4 x i32> @llvm.SI.resinfo(i32 %a9, <32 x i8> undef, i32 9)
-   %res10 = call <4 x i32> @llvm.SI.resinfo(i32 %a10, <32 x i8> undef, i32 10)
-   %res11 = call <4 x i32> @llvm.SI.resinfo(i32 %a11, <32 x i8> undef, i32 11)
-   %res12 = call <4 x i32> @llvm.SI.resinfo(i32 %a12, <32 x i8> undef, i32 12)
-   %res13 = call <4 x i32> @llvm.SI.resinfo(i32 %a13, <32 x i8> undef, i32 13)
-   %res14 = call <4 x i32> @llvm.SI.resinfo(i32 %a14, <32 x i8> undef, i32 14)
-   %res15 = call <4 x i32> @llvm.SI.resinfo(i32 %a15, <32 x i8> undef, i32 15)
-   %res16 = call <4 x i32> @llvm.SI.resinfo(i32 %a16, <32 x i8> undef, i32 16)
-   %e1 = extractelement <4 x i32> %res1, i32 0
-   %e2 = extractelement <4 x i32> %res2, i32 1
-   %e3 = extractelement <4 x i32> %res3, i32 2
-   %e4 = extractelement <4 x i32> %res4, i32 3
-   %t0 = extractelement <4 x i32> %res5, i32 0
-   %t1 = extractelement <4 x i32> %res5, i32 1
-   %e5 = add i32 %t0, %t1
-   %t2 = extractelement <4 x i32> %res6, i32 0
-   %t3 = extractelement <4 x i32> %res6, i32 2
-   %e6 = add i32 %t2, %t3
-   %t4 = extractelement <4 x i32> %res7, i32 0
-   %t5 = extractelement <4 x i32> %res7, i32 3
-   %e7 = add i32 %t4, %t5
-   %t6 = extractelement <4 x i32> %res8, i32 1
-   %t7 = extractelement <4 x i32> %res8, i32 2
-   %e8 = add i32 %t6, %t7
-   %t8 = extractelement <4 x i32> %res9, i32 1
-   %t9 = extractelement <4 x i32> %res9, i32 3
-   %e9 = add i32 %t8, %t9
-   %t10 = extractelement <4 x i32> %res10, i32 2
-   %t11 = extractelement <4 x i32> %res10, i32 3
-   %e10 = add i32 %t10, %t11
-   %t12 = extractelement <4 x i32> %res11, i32 0
-   %t13 = extractelement <4 x i32> %res11, i32 1
-   %t14 = extractelement <4 x i32> %res11, i32 2
-   %t15 = add i32 %t12, %t13
-   %e11 = add i32 %t14, %t15
-   %t16 = extractelement <4 x i32> %res12, i32 0
-   %t17 = extractelement <4 x i32> %res12, i32 1
-   %t18 = extractelement <4 x i32> %res12, i32 3
-   %t19 = add i32 %t16, %t17
-   %e12 = add i32 %t18, %t19
-   %t20 = extractelement <4 x i32> %res13, i32 0
-   %t21 = extractelement <4 x i32> %res13, i32 2
-   %t22 = extractelement <4 x i32> %res13, i32 3
-   %t23 = add i32 %t20, %t21
-   %e13 = add i32 %t22, %t23
-   %t24 = extractelement <4 x i32> %res14, i32 1
-   %t25 = extractelement <4 x i32> %res14, i32 2
-   %t26 = extractelement <4 x i32> %res14, i32 3
-   %t27 = add i32 %t24, %t25
-   %e14 = add i32 %t26, %t27
-   %t28 = extractelement <4 x i32> %res15, i32 0
-   %t29 = extractelement <4 x i32> %res15, i32 1
-   %t30 = extractelement <4 x i32> %res15, i32 2
-   %t31 = extractelement <4 x i32> %res15, i32 3
-   %t32 = add i32 %t28, %t29
-   %t33 = add i32 %t30, %t31
-   %e15 = add i32 %t32, %t33
-   %e16 = extractelement <4 x i32> %res16, i32 3
-   %s1 = add i32 %e1, %e2
-   %s2 = add i32 %s1, %e3
-   %s3 = add i32 %s2, %e4
-   %s4 = add i32 %s3, %e5
-   %s5 = add i32 %s4, %e6
-   %s6 = add i32 %s5, %e7
-   %s7 = add i32 %s6, %e8
-   %s8 = add i32 %s7, %e9
-   %s9 = add i32 %s8, %e10
-   %s10 = add i32 %s9, %e11
-   %s11 = add i32 %s10, %e12
-   %s12 = add i32 %s11, %e13
-   %s13 = add i32 %s12, %e14
-   %s14 = add i32 %s13, %e15
-   %s15 = add i32 %s14, %e16
-   %s16 = bitcast i32 %s15 to float
-   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s16, float %s16, float %s16, float %s16)
-   ret void
-}
-
-declare <4 x i32> @llvm.SI.resinfo(i32, <32 x i8>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Index: test/CodeGen/AMDGPU/llvm.SI.sample-masked.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.SI.sample-masked.ll
+++ /dev/null
@@ -1,96 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s
-
-; CHECK-LABEL: {{^}}v1:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 13
-define void @v1(i32 %a1) #0 {
-entry:
-  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
-  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
-  %2 = extractelement <4 x float> %1, i32 0
-  %3 = extractelement <4 x float> %1, i32 2
-  %4 = extractelement <4 x float> %1, i32 3
-  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
-  ret void
-}
-
-; CHECK-LABEL: {{^}}v2:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 11
-define void @v2(i32 %a1) #0 {
-entry:
-  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
-  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
-  %2 = extractelement <4 x float> %1, i32 0
-  %3 = extractelement <4 x float> %1, i32 1
-  %4 = extractelement <4 x float> %1, i32 3
-  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
-  ret void
-}
-
-; CHECK-LABEL: {{^}}v3:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
-define void @v3(i32 %a1) #0 {
-entry:
-  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
-  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
-  %2 = extractelement <4 x float> %1, i32 1
-  %3 = extractelement <4 x float> %1, i32 2
-  %4 = extractelement <4 x float> %1, i32 3
-  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
-  ret void
-}
-
-; CHECK-LABEL: {{^}}v4:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 7
-define void @v4(i32 %a1) #0 {
-entry:
-  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
-  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
-  %2 = extractelement <4 x float> %1, i32 0
-  %3 = extractelement <4 x float> %1, i32 1
-  %4 = extractelement <4 x float> %1, i32 2
-  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
-  ret void
-}
-
-; CHECK-LABEL: {{^}}v5:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
-define void @v5(i32 %a1) #0 {
-entry:
-  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
-  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
-  %2 = extractelement <4 x float> %1, i32 1
-  %3 = extractelement <4 x float> %1, i32 3
-  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
-  ret void
-}
-
-; CHECK-LABEL: {{^}}v6:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 6
-define void @v6(i32 %a1) #0 {
-entry:
-  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
-  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
-  %2 = extractelement <4 x float> %1, i32 1
-  %3 = extractelement <4 x float> %1, i32 2
-  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
-  ret void
-}
-
-; CHECK-LABEL: {{^}}v7:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 9
-define void @v7(i32 %a1) #0 {
-entry:
-  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
-  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
-  %2 = extractelement <4 x float> %1, i32 0
-  %3 = extractelement <4 x float> %1, i32 3
-  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
-  ret void
-}
-
-declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
Index: test/CodeGen/AMDGPU/llvm.SI.sample.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.SI.sample.ll
+++ /dev/null
@@ -1,160 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15
-;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 3
-;CHECK-DAG: image_sample {{v[0-9]+}}, 2
-;CHECK-DAG: image_sample {{v[0-9]+}}, 1
-;CHECK-DAG: image_sample {{v[0-9]+}}, 4
-;CHECK-DAG: image_sample {{v[0-9]+}}, 8
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 5
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 9
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 6
-;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
-;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 12
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 7
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 11
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 13
-;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
-;CHECK-DAG: image_sample {{v[0-9]+}}, 8
-
-define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
-   %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
-   %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
-   %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
-   %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
-   %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
-   %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
-   %v7 = insertelement <4 x i32> undef, i32 %a2, i32 2
-   %v8 = insertelement <4 x i32> undef, i32 %a2, i32 3
-   %v9 = insertelement <4 x i32> undef, i32 %a3, i32 0
-   %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
-   %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
-   %v12 = insertelement <4 x i32> undef, i32 %a3, i32 3
-   %v13 = insertelement <4 x i32> undef, i32 %a4, i32 0
-   %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1
-   %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
-   %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
-   %res1 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v1,
-      <32 x i8> undef, <16 x i8> undef, i32 1)
-   %res2 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v2,
-      <32 x i8> undef, <16 x i8> undef, i32 2)
-   %res3 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v3,
-      <32 x i8> undef, <16 x i8> undef, i32 3)
-   %res4 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v4,
-      <32 x i8> undef, <16 x i8> undef, i32 4)
-   %res5 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v5,
-      <32 x i8> undef, <16 x i8> undef, i32 5)
-   %res6 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v6,
-      <32 x i8> undef, <16 x i8> undef, i32 6)
-   %res7 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v7,
-      <32 x i8> undef, <16 x i8> undef, i32 7)
-   %res8 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v8,
-      <32 x i8> undef, <16 x i8> undef, i32 8)
-   %res9 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v9,
-      <32 x i8> undef, <16 x i8> undef, i32 9)
-   %res10 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v10,
-      <32 x i8> undef, <16 x i8> undef, i32 10)
-   %res11 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v11,
-      <32 x i8> undef, <16 x i8> undef, i32 11)
-   %res12 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v12,
-      <32 x i8> undef, <16 x i8> undef, i32 12)
-   %res13 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v13,
-      <32 x i8> undef, <16 x i8> undef, i32 13)
-   %res14 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v14,
-      <32 x i8> undef, <16 x i8> undef, i32 14)
-   %res15 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v15,
-      <32 x i8> undef, <16 x i8> undef, i32 15)
-   %res16 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v16,
-      <32 x i8> undef, <16 x i8> undef, i32 16)
-   %e1 = extractelement <4 x float> %res1, i32 0
-   %e2 = extractelement <4 x float> %res2, i32 1
-   %e3 = extractelement <4 x float> %res3, i32 2
-   %e4 = extractelement <4 x float> %res4, i32 3
-   %t0 = extractelement <4 x float> %res5, i32 0
-   %t1 = extractelement <4 x float> %res5, i32 1
-   %e5 = fadd float %t0, %t1
-   %t2 = extractelement <4 x float> %res6, i32 0
-   %t3 = extractelement <4 x float> %res6, i32 2
-   %e6 = fadd float %t2, %t3
-   %t4 = extractelement <4 x float> %res7, i32 0
-   %t5 = extractelement <4 x float> %res7, i32 3
-   %e7 = fadd float %t4, %t5
-   %t6 = extractelement <4 x float> %res8, i32 1
-   %t7 = extractelement <4 x float> %res8, i32 2
-   %e8 = fadd float %t6, %t7
-   %t8 = extractelement <4 x float> %res9, i32 1
-   %t9 = extractelement <4 x float> %res9, i32 3
-   %e9 = fadd float %t8, %t9
-   %t10 = extractelement <4 x float> %res10, i32 2
-   %t11 = extractelement <4 x float> %res10, i32 3
-   %e10 = fadd float %t10, %t11
-   %t12 = extractelement <4 x float> %res11, i32 0
-   %t13 = extractelement <4 x float> %res11, i32 1
-   %t14 = extractelement <4 x float> %res11, i32 2
-   %t15 = fadd float %t12, %t13
-   %e11 = fadd float %t14, %t15
-   %t16 = extractelement <4 x float> %res12, i32 0
-   %t17 = extractelement <4 x float> %res12, i32 1
-   %t18 = extractelement <4 x float> %res12, i32 3
-   %t19 = fadd float %t16, %t17
-   %e12 = fadd float %t18, %t19
-   %t20 = extractelement <4 x float> %res13, i32 0
-   %t21 = extractelement <4 x float> %res13, i32 2
-   %t22 = extractelement <4 x float> %res13, i32 3
-   %t23 = fadd float %t20, %t21
-   %e13 = fadd float %t22, %t23
-   %t24 = extractelement <4 x float> %res14, i32 1
-   %t25 = extractelement <4 x float> %res14, i32 2
-   %t26 = extractelement <4 x float> %res14, i32 3
-   %t27 = fadd float %t24, %t25
-   %e14 = fadd float %t26, %t27
-   %t28 = extractelement <4 x float> %res15, i32 0
-   %t29 = extractelement <4 x float> %res15, i32 1
-   %t30 = extractelement <4 x float> %res15, i32 2
-   %t31 = extractelement <4 x float> %res15, i32 3
-   %t32 = fadd float %t28, %t29
-   %t33 = fadd float %t30, %t31
-   %e15 = fadd float %t32, %t33
-   %e16 = extractelement <4 x float> %res16, i32 3
-   %s1 = fadd float %e1, %e2
-   %s2 = fadd float %s1, %e3
-   %s3 = fadd float %s2, %e4
-   %s4 = fadd float %s3, %e5
-   %s5 = fadd float %s4, %e6
-   %s6 = fadd float %s5, %e7
-   %s7 = fadd float %s6, %e8
-   %s8 = fadd float %s7, %e9
-   %s9 = fadd float %s8, %e10
-   %s10 = fadd float %s9, %e11
-   %s11 = fadd float %s10, %e12
-   %s12 = fadd float %s11, %e13
-   %s13 = fadd float %s12, %e14
-   %s14 = fadd float %s13, %e15
-   %s15 = fadd float %s14, %e16
-   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s15, float %s15, float %s15, float %s15)
-   ret void
-}
-
-; CHECK: {{^}}v1:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15
-define void @v1(i32 %a1) #0 {
-entry:
-  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
-  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
-  %2 = extractelement <4 x float> %1, i32 0
-  %3 = extractelement <4 x float> %1, i32 1
-  %4 = extractelement <4 x float> %1, i32 2
-  %5 = extractelement <4 x float> %1, i32 3
-  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %5)
-  ret void
-}
-
-
-declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
-
-declare <4 x float> @llvm.SI.sample.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
Index: test/CodeGen/AMDGPU/llvm.SI.sampled.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.SI.sampled.ll
+++ /dev/null
@@ -1,143 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 15
-;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 3
-;CHECK-DAG: image_sample_d {{v[0-9]+}}, 2
-;CHECK-DAG: image_sample_d {{v[0-9]+}}, 1
-;CHECK-DAG: image_sample_d {{v[0-9]+}}, 4
-;CHECK-DAG: image_sample_d {{v[0-9]+}}, 8
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 5
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 9
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 6
-;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 10
-;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 12
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 7
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 11
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 13
-;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 14
-;CHECK-DAG: image_sample_d {{v[0-9]+}}, 8
-
-define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
-   %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
-   %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
-   %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
-   %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
-   %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
-   %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
-   %v7 = insertelement <4 x i32> undef, i32 %a2, i32 2
-   %v8 = insertelement <4 x i32> undef, i32 %a2, i32 3
-   %v9 = insertelement <4 x i32> undef, i32 %a3, i32 0
-   %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
-   %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
-   %v12 = insertelement <4 x i32> undef, i32 %a3, i32 3
-   %v13 = insertelement <4 x i32> undef, i32 %a4, i32 0
-   %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1
-   %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
-   %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
-   %res1 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v1,
-      <32 x i8> undef, <16 x i8> undef, i32 1)
-   %res2 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v2,
-      <32 x i8> undef, <16 x i8> undef, i32 2)
-   %res3 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v3,
-      <32 x i8> undef, <16 x i8> undef, i32 3)
-   %res4 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v4,
-      <32 x i8> undef, <16 x i8> undef, i32 4)
-   %res5 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v5,
-      <32 x i8> undef, <16 x i8> undef, i32 5)
-   %res6 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v6,
-      <32 x i8> undef, <16 x i8> undef, i32 6)
-   %res7 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v7,
-      <32 x i8> undef, <16 x i8> undef, i32 7)
-   %res8 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v8,
-      <32 x i8> undef, <16 x i8> undef, i32 8)
-   %res9 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v9,
-      <32 x i8> undef, <16 x i8> undef, i32 9)
-   %res10 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v10,
-      <32 x i8> undef, <16 x i8> undef, i32 10)
-   %res11 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v11,
-      <32 x i8> undef, <16 x i8> undef, i32 11)
-   %res12 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v12,
-      <32 x i8> undef, <16 x i8> undef, i32 12)
-   %res13 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v13,
-      <32 x i8> undef, <16 x i8> undef, i32 13)
-   %res14 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v14,
-      <32 x i8> undef, <16 x i8> undef, i32 14)
-   %res15 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v15,
-      <32 x i8> undef, <16 x i8> undef, i32 15)
-   %res16 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v16,
-      <32 x i8> undef, <16 x i8> undef, i32 16)
-   %e1 = extractelement <4 x float> %res1, i32 0
-   %e2 = extractelement <4 x float> %res2, i32 1
-   %e3 = extractelement <4 x float> %res3, i32 2
-   %e4 = extractelement <4 x float> %res4, i32 3
-   %t0 = extractelement <4 x float> %res5, i32 0
-   %t1 = extractelement <4 x float> %res5, i32 1
-   %e5 = fadd float %t0, %t1
-   %t2 = extractelement <4 x float> %res6, i32 0
-   %t3 = extractelement <4 x float> %res6, i32 2
-   %e6 = fadd float %t2, %t3
-   %t4 = extractelement <4 x float> %res7, i32 0
-   %t5 = extractelement <4 x float> %res7, i32 3
-   %e7 = fadd float %t4, %t5
-   %t6 = extractelement <4 x float> %res8, i32 1
-   %t7 = extractelement <4 x float> %res8, i32 2
-   %e8 = fadd float %t6, %t7
-   %t8 = extractelement <4 x float> %res9, i32 1
-   %t9 = extractelement <4 x float> %res9, i32 3
-   %e9 = fadd float %t8, %t9
-   %t10 = extractelement <4 x float> %res10, i32 2
-   %t11 = extractelement <4 x float> %res10, i32 3
-   %e10 = fadd float %t10, %t11
-   %t12 = extractelement <4 x float> %res11, i32 0
-   %t13 = extractelement <4 x float> %res11, i32 1
-   %t14 = extractelement <4 x float> %res11, i32 2
-   %t15 = fadd float %t12, %t13
-   %e11 = fadd float %t14, %t15
-   %t16 = extractelement <4 x float> %res12, i32 0
-   %t17 = extractelement <4 x float> %res12, i32 1
-   %t18 = extractelement <4 x float> %res12, i32 3
-   %t19 = fadd float %t16, %t17
-   %e12 = fadd float %t18, %t19
-   %t20 = extractelement <4 x float> %res13, i32 0
-   %t21 = extractelement <4 x float> %res13, i32 2
-   %t22 = extractelement <4 x float> %res13, i32 3
-   %t23 = fadd float %t20, %t21
-   %e13 = fadd float %t22, %t23
-   %t24 = extractelement <4 x float> %res14, i32 1
-   %t25 = extractelement <4 x float> %res14, i32 2
-   %t26 = extractelement <4 x float> %res14, i32 3
-   %t27 = fadd float %t24, %t25
-   %e14 = fadd float %t26, %t27
-   %t28 = extractelement <4 x float> %res15, i32 0
-   %t29 = extractelement <4 x float> %res15, i32 1
-   %t30 = extractelement <4 x float> %res15, i32 2
-   %t31 = extractelement <4 x float> %res15, i32 3
-   %t32 = fadd float %t28, %t29
-   %t33 = fadd float %t30, %t31
-   %e15 = fadd float %t32, %t33
-   %e16 = extractelement <4 x float> %res16, i32 3
-   %s1 = fadd float %e1, %e2
-   %s2 = fadd float %s1, %e3
-   %s3 = fadd float %s2, %e4
-   %s4 = fadd float %s3, %e5
-   %s5 = fadd float %s4, %e6
-   %s6 = fadd float %s5, %e7
-   %s7 = fadd float %s6, %e8
-   %s8 = fadd float %s7, %e9
-   %s9 = fadd float %s8, %e10
-   %s10 = fadd float %s9, %e11
-   %s11 = fadd float %s10, %e12
-   %s12 = fadd float %s11, %e13
-   %s13 = fadd float %s12, %e14
-   %s14 = fadd float %s13, %e15
-   %s15 = fadd float %s14, %e16
-   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s15, float %s15, float %s15, float %s15)
-   ret void
-}
-
-declare <4 x float> @llvm.SI.sampled.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
Index: test/CodeGen/AMDGPU/sgpr-copy.ll
===================================================================
--- test/CodeGen/AMDGPU/sgpr-copy.ll
+++ test/CodeGen/AMDGPU/sgpr-copy.ll
@@ -4,10 +4,14 @@
 ; This test checks that no VGPR to SGPR copies are created by the register
 ; allocator.
 
+
+declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+
 ; CHECK-LABEL: {{^}}phi1:
 ; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
 ; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
-define void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
 main_body:
   %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
   %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -31,7 +35,7 @@
 
 ; Make sure this program doesn't crash
 ; CHECK-LABEL: {{^}}phi2:
-define void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
 main_body:
   %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
   %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -50,8 +54,8 @@
   %tmp33 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 84)
   %tmp34 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 88)
   %tmp35 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 92)
-  %tmp36 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %arg2, i32 0
-  %tmp37 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp36, !tbaa !0
+  %tmp36 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
+  %tmp37 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp36, !tbaa !0
   %tmp38 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg1, i32 0
   %tmp39 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp38, !tbaa !0
   %tmp40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg3, <2 x i32> %arg5)
@@ -63,7 +67,8 @@
   %tmp46 = bitcast float %tmp41 to i32
   %tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0
   %tmp48 = insertelement <2 x i32> %tmp47, i32 %tmp46, i32 1
-  %tmp49 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp48, <32 x i8> %tmp37, <16 x i8> %tmp39, i32 2)
+  %tmp39.bc = bitcast <16 x i8> %tmp39 to <4 x i32>
+  %tmp49 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp48, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp50 = extractelement <4 x float> %tmp49, i32 2
   %tmp51 = call float @fabs(float %tmp50)
   %tmp52 = fmul float %tmp42, %tmp42
@@ -151,7 +156,7 @@
 
 ; We just want ot make sure the program doesn't crash
 ; CHECK-LABEL: {{^}}loop:
-define void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
 main_body:
   %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
   %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -200,7 +205,7 @@
 declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
 
 ; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
+declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <8 x i32>, <16 x i8>, i32) #1
 
 ; Function Attrs: readnone
 declare float @llvm.amdgcn.rsq.f32(float) #3
@@ -223,27 +228,28 @@
 ; CHECK: image_sample
 ; CHECK: exp
 ; CHECK: s_endpgm
-define void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
 entry:
   %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
   %tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
   %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 16)
-  %tmp23 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 0
-  %tmp24 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp23, !tbaa !0
+  %tmp23 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
+  %tmp24 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp23, !tbaa !0
   %tmp25 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
   %tmp26 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp25, !tbaa !0
   %tmp27 = fcmp oeq float %tmp22, 0.000000e+00
+  %tmp26.bc = bitcast <16 x i8> %tmp26 to <4 x i32>
   br i1 %tmp27, label %if, label %else
 
 if:                                               ; preds = %entry
-  %val.if = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> zeroinitializer, <32 x i8> %tmp24, <16 x i8> %tmp26, i32 2)
+  %val.if = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> zeroinitializer, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %val.if.0 = extractelement <4 x float> %val.if, i32 0
   %val.if.1 = extractelement <4 x float> %val.if, i32 1
   %val.if.2 = extractelement <4 x float> %val.if, i32 2
   br label %endif
 
 else:                                             ; preds = %entry
-  %val.else = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> <i32 1, i32 0>, <32 x i8> %tmp24, <16 x i8> %tmp26, i32 2)
+  %val.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1, i32 0>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %val.else.0 = extractelement <4 x float> %val.else, i32 0
   %val.else.1 = extractelement <4 x float> %val.else, i32 1
   %val.else.2 = extractelement <4 x float> %val.else, i32 2
@@ -286,7 +292,7 @@
 ; This test is just checking that we don't crash / assertion fail.
 ; CHECK-LABEL: {{^}}copy2:
 ; CHECK: s_endpgm
-define void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
 entry:
   br label %LOOP68
 
@@ -336,9 +342,8 @@
   %tmp53 = bitcast float %tmp30 to i32
   %tmp54 = insertelement <2 x i32> undef, i32 %tmp52, i32 0
   %tmp55 = insertelement <2 x i32> %tmp54, i32 %tmp53, i32 1
-  %tmp56 = bitcast <8 x i32> %tmp26 to <32 x i8>
-  %tmp57 = bitcast <4 x i32> %tmp28 to <16 x i8>
-  %tmp58 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp55, <32 x i8> %tmp56, <16 x i8> %tmp57, i32 2)
+  %tmp56 = bitcast <8 x i32> %tmp26 to <8 x i32>
+  %tmp58 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp55, <8 x i32> %tmp56, <4 x i32> %tmp28, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   br label %bb71
 
 bb80:                                             ; preds = %bb
@@ -347,9 +352,8 @@
   %tmp82.2 = add i32 %tmp82, 1
   %tmp83 = insertelement <2 x i32> undef, i32 %tmp81, i32 0
   %tmp84 = insertelement <2 x i32> %tmp83, i32 %tmp82.2, i32 1
-  %tmp85 = bitcast <8 x i32> %tmp26 to <32 x i8>
-  %tmp86 = bitcast <4 x i32> %tmp28 to <16 x i8>
-  %tmp87 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp84, <32 x i8> %tmp85, <16 x i8> %tmp86, i32 2)
+  %tmp85 = bitcast <8 x i32> %tmp26 to <8 x i32>
+  %tmp87 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp84, <8 x i32> %tmp85, <4 x i32> %tmp28, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   br label %bb71
 
 bb71:                                             ; preds = %bb80, %bb38
Index: test/CodeGen/AMDGPU/si-lod-bias.ll
===================================================================
--- test/CodeGen/AMDGPU/si-lod-bias.ll
+++ test/CodeGen/AMDGPU/si-lod-bias.ll
@@ -6,13 +6,13 @@
 
 ; CHECK: {{^}}main:
 ; CHECK: image_sample_b v{{\[[0-9]:[0-9]\]}}, 15, 0, 0, 0, 0, 0, 0, 0, v{{\[[0-9]:[0-9]\]}}
-define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
 main_body:
   %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
   %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
   %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
-  %tmp22 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %arg2, i32 0
-  %tmp23 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp22, !tbaa !0
+  %tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
+  %tmp23 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp22, !tbaa !0
   %tmp24 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg1, i32 0
   %tmp25 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp24, !tbaa !0
   %tmp26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg3, <2 x i32> %arg5)
@@ -24,7 +24,8 @@
   %tmp32 = insertelement <4 x i32> %tmp31, i32 %tmp29, i32 1
   %tmp33 = insertelement <4 x i32> %tmp32, i32 %tmp30, i32 2
   %tmp34 = insertelement <4 x i32> %tmp33, i32 undef, i32 3
-  %tmp35 = call <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32> %tmp34, <32 x i8> %tmp23, <16 x i8> %tmp25, i32 2)
+  %tmp25.bc = bitcast <16 x i8> %tmp25 to <4 x i32>
+  %tmp35 = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> %tmp34, <8 x i32> %tmp23, <4 x i32> %tmp25.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp36 = extractelement <4 x float> %tmp35, i32 0
   %tmp37 = extractelement <4 x float> %tmp35, i32 1
   %tmp38 = extractelement <4 x float> %tmp35, i32 2
@@ -39,8 +40,8 @@
 ; Function Attrs: nounwind readnone
 declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
 
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
+declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
 
 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
 
Index: test/CodeGen/AMDGPU/si-scheduler.ll
===================================================================
--- test/CodeGen/AMDGPU/si-scheduler.ll
+++ test/CodeGen/AMDGPU/si-scheduler.ll
@@ -23,7 +23,9 @@
   %tmp28 = bitcast float %tmp26 to i32
   %tmp29 = insertelement <2 x i32> undef, i32 %tmp27, i32 0
   %tmp30 = insertelement <2 x i32> %tmp29, i32 %tmp28, i32 1
-  %tmp31 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp30, <32 x i8> %tmp22, <16 x i8> %tmp24, i32 2)
+  %tmp22.bc = bitcast <32 x i8> %tmp22 to <8 x i32>
+  %tmp24.bc = bitcast <16 x i8> %tmp24 to <4 x i32>
+  %tmp31 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp30, <8 x i32> %tmp22.bc, <4 x i32> %tmp24.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp32 = extractelement <4 x float> %tmp31, i32 0
   %tmp33 = extractelement <4 x float> %tmp31, i32 1
   %tmp34 = extractelement <4 x float> %tmp31, i32 2
@@ -39,8 +41,8 @@
 ; Function Attrs: nounwind readnone
 declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
 
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
+declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
 
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.SI.packf16(float, float) #1
Index: test/CodeGen/AMDGPU/si-sgpr-spill.ll
===================================================================
--- test/CodeGen/AMDGPU/si-sgpr-spill.ll
+++ test/CodeGen/AMDGPU/si-sgpr-spill.ll
@@ -22,7 +22,7 @@
 ; Writing to M0 from an SMRD instruction will hang the GPU.
 ; CHECK-NOT: s_buffer_load_dword m0
 ; CHECK: s_endpgm
-define void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
 main_body:
   %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
   %tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -64,36 +64,37 @@
   %tmp57 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 372)
   %tmp58 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 376)
   %tmp59 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 384)
-  %tmp60 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 0
-  %tmp61 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp60, !tbaa !0
+  %tmp60 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
+  %tmp61 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp60, !tbaa !0
   %tmp62 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
   %tmp63 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp62, !tbaa !0
-  %tmp64 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 1
-  %tmp65 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp64, !tbaa !0
+  %tmp63.bc = bitcast <16 x i8> %tmp63 to <4 x i32>
+  %tmp64 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 1
+  %tmp65 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp64, !tbaa !0
   %tmp66 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 1
   %tmp67 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp66, !tbaa !0
-  %tmp68 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 2
-  %tmp69 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp68, !tbaa !0
+  %tmp68 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 2
+  %tmp69 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp68, !tbaa !0
   %tmp70 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 2
   %tmp71 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp70, !tbaa !0
-  %tmp72 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 3
-  %tmp73 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp72, !tbaa !0
+  %tmp72 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 3
+  %tmp73 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp72, !tbaa !0
   %tmp74 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 3
   %tmp75 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp74, !tbaa !0
-  %tmp76 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 4
-  %tmp77 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp76, !tbaa !0
+  %tmp76 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 4
+  %tmp77 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp76, !tbaa !0
   %tmp78 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 4
   %tmp79 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp78, !tbaa !0
-  %tmp80 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 5
-  %tmp81 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp80, !tbaa !0
+  %tmp80 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 5
+  %tmp81 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp80, !tbaa !0
   %tmp82 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 5
   %tmp83 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp82, !tbaa !0
-  %tmp84 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 6
-  %tmp85 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp84, !tbaa !0
+  %tmp84 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 6
+  %tmp85 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp84, !tbaa !0
   %tmp86 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 6
   %tmp87 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp86, !tbaa !0
-  %tmp88 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 7
-  %tmp89 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp88, !tbaa !0
+  %tmp88 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 7
+  %tmp89 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp88, !tbaa !0
   %tmp90 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 7
   %tmp91 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp90, !tbaa !0
   %tmp92 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg4, <2 x i32> %arg6)
@@ -272,7 +273,7 @@
   %tmp240 = insertelement <8 x i32> %tmp239, i32 %tmp238, i32 5
   %tmp241 = insertelement <8 x i32> %tmp240, i32 undef, i32 6
   %tmp242 = insertelement <8 x i32> %tmp241, i32 undef, i32 7
-  %tmp243 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp242, <32 x i8> %tmp61, <16 x i8> %tmp63, i32 2)
+  %tmp243 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp242, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp244 = extractelement <4 x float> %tmp243, i32 3
   %tmp245 = fcmp oge float %temp30.0, %tmp244
   %tmp246 = sext i1 %tmp245 to i32
@@ -317,7 +318,8 @@
   %tmp274 = insertelement <8 x i32> %tmp273, i32 %tmp268, i32 5
   %tmp275 = insertelement <8 x i32> %tmp274, i32 undef, i32 6
   %tmp276 = insertelement <8 x i32> %tmp275, i32 undef, i32 7
-  %tmp277 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp276, <32 x i8> %tmp65, <16 x i8> %tmp67, i32 2)
+  %tmp67.bc = bitcast <16 x i8> %tmp67 to <4 x i32>
+  %tmp277 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp276, <8 x i32> %tmp65, <4 x i32> %tmp67.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp278 = extractelement <4 x float> %tmp277, i32 0
   %tmp279 = extractelement <4 x float> %tmp277, i32 1
   %tmp280 = extractelement <4 x float> %tmp277, i32 2
@@ -337,7 +339,8 @@
   %tmp294 = insertelement <8 x i32> %tmp293, i32 %tmp288, i32 5
   %tmp295 = insertelement <8 x i32> %tmp294, i32 undef, i32 6
   %tmp296 = insertelement <8 x i32> %tmp295, i32 undef, i32 7
-  %tmp297 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp296, <32 x i8> %tmp81, <16 x i8> %tmp83, i32 2)
+  %tmp83.bc = bitcast <16 x i8> %tmp83 to <4 x i32>
+  %tmp297 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp296, <8 x i32> %tmp81, <4 x i32> %tmp83.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp298 = extractelement <4 x float> %tmp297, i32 0
   %tmp299 = extractelement <4 x float> %tmp297, i32 1
   %tmp300 = extractelement <4 x float> %tmp297, i32 2
@@ -355,7 +358,8 @@
   %tmp312 = insertelement <8 x i32> %tmp311, i32 %tmp306, i32 5
   %tmp313 = insertelement <8 x i32> %tmp312, i32 undef, i32 6
   %tmp314 = insertelement <8 x i32> %tmp313, i32 undef, i32 7
-  %tmp315 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp314, <32 x i8> %tmp77, <16 x i8> %tmp79, i32 2)
+  %tmp79.bc = bitcast <16 x i8> %tmp79 to <4 x i32>
+  %tmp315 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp314, <8 x i32> %tmp77, <4 x i32> %tmp79.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp316 = extractelement <4 x float> %tmp315, i32 0
   %tmp317 = extractelement <4 x float> %tmp315, i32 1
   %tmp318 = extractelement <4 x float> %tmp315, i32 2
@@ -385,7 +389,7 @@
   %tmp342 = insertelement <8 x i32> %tmp341, i32 %tmp336, i32 5
   %tmp343 = insertelement <8 x i32> %tmp342, i32 undef, i32 6
   %tmp344 = insertelement <8 x i32> %tmp343, i32 undef, i32 7
-  %tmp345 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp344, <32 x i8> %tmp61, <16 x i8> %tmp63, i32 2)
+  %tmp345 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp344, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp346 = extractelement <4 x float> %tmp345, i32 0
   %tmp347 = extractelement <4 x float> %tmp345, i32 1
   %tmp348 = extractelement <4 x float> %tmp345, i32 2
@@ -415,7 +419,8 @@
   %tmp372 = insertelement <8 x i32> %tmp371, i32 %tmp366, i32 5
   %tmp373 = insertelement <8 x i32> %tmp372, i32 undef, i32 6
   %tmp374 = insertelement <8 x i32> %tmp373, i32 undef, i32 7
-  %tmp375 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp374, <32 x i8> %tmp69, <16 x i8> %tmp71, i32 2)
+  %tmp71.bc = bitcast <16 x i8> %tmp71 to <4 x i32>
+  %tmp375 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp374, <8 x i32> %tmp69, <4 x i32> %tmp71.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp376 = extractelement <4 x float> %tmp375, i32 0
   %tmp377 = extractelement <4 x float> %tmp375, i32 1
   %tmp378 = extractelement <4 x float> %tmp375, i32 2
@@ -469,7 +474,8 @@
   %tmp426 = insertelement <8 x i32> %tmp425, i32 %tmp420, i32 5
   %tmp427 = insertelement <8 x i32> %tmp426, i32 undef, i32 6
   %tmp428 = insertelement <8 x i32> %tmp427, i32 undef, i32 7
-  %tmp429 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp428, <32 x i8> %tmp85, <16 x i8> %tmp87, i32 2)
+  %tmp87.bc = bitcast <16 x i8> %tmp87 to <4 x i32>
+  %tmp429 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp428, <8 x i32> %tmp85, <4 x i32> %tmp87.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp430 = extractelement <4 x float> %tmp429, i32 0
   %tmp431 = extractelement <4 x float> %tmp429, i32 1
   %tmp432 = extractelement <4 x float> %tmp429, i32 2
@@ -510,7 +516,8 @@
   %tmp467 = insertelement <4 x i32> %tmp466, i32 %tmp464, i32 1
   %tmp468 = insertelement <4 x i32> %tmp467, i32 %tmp465, i32 2
   %tmp469 = insertelement <4 x i32> %tmp468, i32 undef, i32 3
-  %tmp470 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %tmp469, <32 x i8> %tmp89, <16 x i8> %tmp91, i32 4)
+  %tmp91.bc = bitcast <16 x i8> %tmp91 to <4 x i32>
+  %tmp470 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tmp469, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp471 = extractelement <4 x float> %tmp470, i32 0
   %tmp472 = extractelement <4 x float> %tmp470, i32 1
   %tmp473 = extractelement <4 x float> %tmp470, i32 2
@@ -611,7 +618,8 @@
   %tmp568 = insertelement <8 x i32> %tmp567, i32 %tmp562, i32 5
   %tmp569 = insertelement <8 x i32> %tmp568, i32 undef, i32 6
   %tmp570 = insertelement <8 x i32> %tmp569, i32 undef, i32 7
-  %tmp571 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp570, <32 x i8> %tmp73, <16 x i8> %tmp75, i32 2)
+  %tmp75.bc = bitcast <16 x i8> %tmp75 to <4 x i32>
+  %tmp571 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp570, <8 x i32> %tmp73, <4 x i32> %tmp75.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp572 = extractelement <4 x float> %tmp571, i32 0
   %tmp573 = extractelement <4 x float> %tmp571, i32 1
   %tmp574 = extractelement <4 x float> %tmp571, i32 2
@@ -635,7 +643,7 @@
   %tmp588 = insertelement <8 x i32> %tmp587, i32 %tmp586, i32 5
   %tmp589 = insertelement <8 x i32> %tmp588, i32 undef, i32 6
   %tmp590 = insertelement <8 x i32> %tmp589, i32 undef, i32 7
-  %tmp591 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp590, <32 x i8> %tmp61, <16 x i8> %tmp63, i32 2)
+  %tmp591 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp590, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp592 = extractelement <4 x float> %tmp591, i32 3
   %tmp593 = fcmp oge float %temp30.1, %tmp592
   %tmp594 = sext i1 %tmp593 to i32
@@ -660,7 +668,7 @@
 
 ; CHECK-LABEL: {{^}}main1:
 ; CHECK: s_endpgm
-define void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
 main_body:
   %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
   %tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -767,40 +775,40 @@
   %tmp122 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 716)
   %tmp123 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 864)
   %tmp124 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 868)
-  %tmp125 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 0
-  %tmp126 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp125, !tbaa !0
+  %tmp125 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
+  %tmp126 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp125, !tbaa !0
   %tmp127 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
   %tmp128 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp127, !tbaa !0
-  %tmp129 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 1
-  %tmp130 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp129, !tbaa !0
+  %tmp129 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 1
+  %tmp130 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp129, !tbaa !0
   %tmp131 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 1
   %tmp132 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp131, !tbaa !0
-  %tmp133 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 2
-  %tmp134 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp133, !tbaa !0
+  %tmp133 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 2
+  %tmp134 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp133, !tbaa !0
   %tmp135 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 2
   %tmp136 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp135, !tbaa !0
-  %tmp137 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 3
-  %tmp138 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp137, !tbaa !0
+  %tmp137 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 3
+  %tmp138 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp137, !tbaa !0
   %tmp139 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 3
   %tmp140 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp139, !tbaa !0
-  %tmp141 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 4
-  %tmp142 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp141, !tbaa !0
+  %tmp141 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 4
+  %tmp142 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp141, !tbaa !0
   %tmp143 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 4
   %tmp144 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp143, !tbaa !0
-  %tmp145 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 5
-  %tmp146 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp145, !tbaa !0
+  %tmp145 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 5
+  %tmp146 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp145, !tbaa !0
   %tmp147 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 5
   %tmp148 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp147, !tbaa !0
-  %tmp149 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 6
-  %tmp150 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp149, !tbaa !0
+  %tmp149 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 6
+  %tmp150 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp149, !tbaa !0
   %tmp151 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 6
   %tmp152 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp151, !tbaa !0
-  %tmp153 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 7
-  %tmp154 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp153, !tbaa !0
+  %tmp153 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 7
+  %tmp154 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp153, !tbaa !0
   %tmp155 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 7
   %tmp156 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp155, !tbaa !0
-  %tmp157 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 8
-  %tmp158 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp157, !tbaa !0
+  %tmp157 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 8
+  %tmp158 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp157, !tbaa !0
   %tmp159 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 8
   %tmp160 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp159, !tbaa !0
   %tmp161 = fcmp ugt float %arg17, 0.000000e+00
@@ -868,7 +876,8 @@
   %tmp222 = bitcast float %tmp174 to i32
   %tmp223 = insertelement <2 x i32> undef, i32 %tmp221, i32 0
   %tmp224 = insertelement <2 x i32> %tmp223, i32 %tmp222, i32 1
-  %tmp225 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp224, <32 x i8> %tmp130, <16 x i8> %tmp132, i32 2)
+  %tmp132.bc = bitcast <16 x i8> %tmp132 to <4 x i32>
+  %tmp225 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp224, <8 x i32> %tmp130, <4 x i32> %tmp132.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp226 = extractelement <4 x float> %tmp225, i32 0
   %tmp227 = extractelement <4 x float> %tmp225, i32 1
   %tmp228 = extractelement <4 x float> %tmp225, i32 2
@@ -938,7 +947,8 @@
   %tmp279 = insertelement <4 x i32> %tmp278, i32 %tmp277, i32 1
   %tmp280 = insertelement <4 x i32> %tmp279, i32 0, i32 2
   %tmp281 = insertelement <4 x i32> %tmp280, i32 undef, i32 3
-  %tmp282 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %tmp281, <32 x i8> %tmp146, <16 x i8> %tmp148, i32 2)
+  %tmp148.bc = bitcast <16 x i8> %tmp148 to <4 x i32>
+  %tmp282 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %tmp281, <8 x i32> %tmp146, <4 x i32> %tmp148.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp283 = extractelement <4 x float> %tmp282, i32 3
   %tmp284 = fadd float %temp168.0, %tmp273
   %tmp285 = fadd float %temp169.0, %tmp274
@@ -1001,7 +1011,8 @@
   %tmp339 = bitcast float %tmp335 to i32
   %tmp340 = insertelement <2 x i32> undef, i32 %tmp338, i32 0
   %tmp341 = insertelement <2 x i32> %tmp340, i32 %tmp339, i32 1
-  %tmp342 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp341, <32 x i8> %tmp134, <16 x i8> %tmp136, i32 2)
+  %tmp136.bc = bitcast <16 x i8> %tmp136 to <4 x i32>
+  %tmp342 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp341, <8 x i32> %tmp134, <4 x i32> %tmp136.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp343 = extractelement <4 x float> %tmp342, i32 0
   %tmp344 = extractelement <4 x float> %tmp342, i32 1
   %tmp345 = extractelement <4 x float> %tmp342, i32 2
@@ -1033,7 +1044,8 @@
   %tmp359 = bitcast float %tmp337 to i32
   %tmp360 = insertelement <2 x i32> undef, i32 %tmp358, i32 0
   %tmp361 = insertelement <2 x i32> %tmp360, i32 %tmp359, i32 1
-  %tmp362 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp361, <32 x i8> %tmp150, <16 x i8> %tmp152, i32 2)
+  %tmp152.bc = bitcast <16 x i8> %tmp152 to <4 x i32>
+  %tmp362 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp361, <8 x i32> %tmp150, <4 x i32> %tmp152.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp363 = extractelement <4 x float> %tmp362, i32 2
   %tmp364 = fmul float %result.i40, %result.i
   %tmp365 = fmul float %result.i36, %result.i44
@@ -1043,7 +1055,8 @@
   %tmp369 = bitcast float %tmp311 to i32
   %tmp370 = insertelement <2 x i32> undef, i32 %tmp368, i32 0
   %tmp371 = insertelement <2 x i32> %tmp370, i32 %tmp369, i32 1
-  %tmp372 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp371, <32 x i8> %tmp138, <16 x i8> %tmp140, i32 2)
+  %tmp140.bc = bitcast <16 x i8> %tmp140 to <4 x i32>
+  %tmp372 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp371, <8 x i32> %tmp138, <4 x i32> %tmp140.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp373 = extractelement <4 x float> %tmp372, i32 0
   %tmp374 = extractelement <4 x float> %tmp372, i32 1
   %tmp375 = extractelement <4 x float> %tmp372, i32 2
@@ -1059,7 +1072,8 @@
   %tmp383 = bitcast float %tmp321 to i32
   %tmp384 = insertelement <2 x i32> undef, i32 %tmp382, i32 0
   %tmp385 = insertelement <2 x i32> %tmp384, i32 %tmp383, i32 1
-  %tmp386 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp385, <32 x i8> %tmp142, <16 x i8> %tmp144, i32 2)
+  %tmp144.bc = bitcast <16 x i8> %tmp144 to <4 x i32>
+  %tmp386 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp385, <8 x i32> %tmp142, <4 x i32> %tmp144.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp387 = extractelement <4 x float> %tmp386, i32 0
   %tmp388 = extractelement <4 x float> %tmp386, i32 1
   %tmp389 = extractelement <4 x float> %tmp386, i32 2
@@ -1155,7 +1169,8 @@
   %tmp467 = bitcast float %tmp220 to i32
   %tmp468 = insertelement <2 x i32> undef, i32 %tmp466, i32 0
   %tmp469 = insertelement <2 x i32> %tmp468, i32 %tmp467, i32 1
-  %tmp470 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp469, <32 x i8> %tmp158, <16 x i8> %tmp160, i32 2)
+  %tmp160.bc = bitcast <16 x i8> %tmp160 to <4 x i32>
+  %tmp470 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp469, <8 x i32> %tmp158, <4 x i32> %tmp160.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp471 = extractelement <4 x float> %tmp470, i32 0
   %tmp472 = extractelement <4 x float> %tmp470, i32 1
   %tmp473 = extractelement <4 x float> %tmp470, i32 2
@@ -1172,7 +1187,8 @@
   %tmp484 = bitcast float %tmp172 to i32
   %tmp485 = insertelement <2 x i32> undef, i32 %tmp483, i32 0
   %tmp486 = insertelement <2 x i32> %tmp485, i32 %tmp484, i32 1
-  %tmp487 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp486, <32 x i8> %tmp154, <16 x i8> %tmp156, i32 2)
+  %tmp156.bc = bitcast <16 x i8> %tmp156 to <4 x i32>
+  %tmp487 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp486, <8 x i32> %tmp154, <4 x i32> %tmp156.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp488 = extractelement <4 x float> %tmp487, i32 0
   %tmp489 = extractelement <4 x float> %tmp487, i32 1
   %tmp490 = extractelement <4 x float> %tmp487, i32 2
@@ -1377,7 +1393,8 @@
   %tmp657 = insertelement <4 x i32> %tmp656, i32 %tmp654, i32 1
   %tmp658 = insertelement <4 x i32> %tmp657, i32 %tmp655, i32 2
   %tmp659 = insertelement <4 x i32> %tmp658, i32 undef, i32 3
-  %tmp660 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %tmp659, <32 x i8> %tmp126, <16 x i8> %tmp128, i32 2)
+  %tmp128.bc = bitcast <16 x i8> %tmp128 to <4 x i32>
+  %tmp660 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %tmp659, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp661 = extractelement <4 x float> %tmp660, i32 0
   %tmp662 = extractelement <4 x float> %tmp660, i32 1
   %tmp663 = bitcast float %tmp646 to i32
@@ -1387,7 +1404,7 @@
   %tmp667 = insertelement <4 x i32> %tmp666, i32 %tmp664, i32 1
   %tmp668 = insertelement <4 x i32> %tmp667, i32 %tmp665, i32 2
   %tmp669 = insertelement <4 x i32> %tmp668, i32 undef, i32 3
-  %tmp670 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %tmp669, <32 x i8> %tmp126, <16 x i8> %tmp128, i32 2)
+  %tmp670 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %tmp669, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp671 = extractelement <4 x float> %tmp670, i32 0
   %tmp672 = extractelement <4 x float> %tmp670, i32 1
   %tmp673 = fsub float -0.000000e+00, %tmp662
@@ -1549,10 +1566,11 @@
 declare float @llvm.AMDIL.clamp.(float, float, float) #1
 
 ; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #2
+declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #2
 
 ; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #2
+declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #2
+
 
 ; Function Attrs: readnone
 declare float @llvm.AMDIL.exp.(float) #1
@@ -1573,7 +1591,7 @@
 declare float @llvm.amdgcn.rsq.f32(float) #2
 
 ; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #2
+declare <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #2
 
 ; Function Attrs: readnone
 declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #1
@@ -1582,7 +1600,8 @@
 declare float @fabs(float) #1
 
 ; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #2
+declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #2
+
 
 ; Function Attrs: nounwind readnone
 declare float @llvm.pow.f32(float, float) #2
Index: test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
===================================================================
--- test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
+++ test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
@@ -67,7 +67,7 @@
   br label %bb4
 
 bb9:                                              ; preds = %bb2
-  %tmp10 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 2)
+  %tmp10 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %tmp11 = extractelement <4 x float> %tmp10, i32 1
   %tmp12 = extractelement <4 x float> %tmp10, i32 3
   br label %bb14
@@ -98,7 +98,7 @@
 }
 
 ; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
+declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
 
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.SI.packf16(float, float) #1