Index: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll =================================================================== --- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll +++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll @@ -1,105 +1,105 @@ ;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32( define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0) + %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32( define float @image_atomic_add(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.add.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0) + %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32( define float @image_atomic_sub(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.sub.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0) + %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32( define float @image_atomic_smin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.smin.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0) + %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32( define float @image_atomic_umin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.umin.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0) + %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32( define float @image_atomic_smax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.smax.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0) + %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32( define float @image_atomic_umax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.umax.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0) + %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32( define float @image_atomic_and(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.and.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0) + %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32( define float @image_atomic_or(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.or.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0) + %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32( define float @image_atomic_xor(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.xor.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0) + %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32( define float @image_atomic_inc(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.inc.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0) + %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32( define float @image_atomic_dec(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.dec.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0) + %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32( define float @image_atomic_cmpswap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data, i32 inreg %cmp) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0) + %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } @@ -112,19 +112,19 @@ ret float %r } -declare i32 @llvm.amdgcn.image.atomic.swap.i32(i32, i32, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.add.i32(i32, i32, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.sub.i32(i32, i32, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.smin.i32(i32, i32, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.umin.i32(i32, i32, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.smax.i32(i32, i32, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.umax.i32(i32, i32, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.and.i32(i32, i32, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.or.i32(i32, i32, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.xor.i32(i32, i32, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.inc.i32(i32, i32, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.dec.i32(i32, i32, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.cmpswap.i32(i32, i32, i32, <8 x i32>,i1, i1, i1) #0 +declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0 declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0 Index: llvm/trunk/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll +++ llvm/trunk/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll @@ -1,13 +1,13 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}adjust_writemask_crash_0_nochain: -; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2 +; GCN: image_get_lod v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2 ; GCN-NOT: v1 ; GCN-NOT: v0 ; GCN: buffer_store_dword v0 define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 { main_body: - %tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp1 = bitcast <2 x float> %tmp to <2 x i32> %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> @@ -17,13 +17,13 @@ } ; GCN-LABEL: {{^}}adjust_writemask_crash_1_nochain: -; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1 +; GCN: image_get_lod v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1 ; GCN-NOT: v1 ; GCN-NOT: v0 ; GCN: buffer_store_dword v0 define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 { main_body: - %tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp1 = bitcast <2 x float> %tmp to <2 x i32> %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> @@ -33,13 +33,13 @@ } ; GCN-LABEL: {{^}}adjust_writemask_crash_0_chain: -; GCN: image_sample v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2 +; GCN: image_sample v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2 ; GCN-NOT: v1 ; GCN-NOT: v0 ; GCN: buffer_store_dword v0 define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 { main_body: - %tmp = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp1 = bitcast <2 x float> %tmp to <2 x i32> %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> @@ -49,13 +49,13 @@ } ; GCN-LABEL: {{^}}adjust_writemask_crash_1_chain: -; GCN: image_sample v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1 +; GCN: image_sample v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1 ; GCN-NOT: v1 ; GCN-NOT: v0 ; GCN: buffer_store_dword v0 define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 { main_body: - %tmp = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp1 = bitcast <2 x float> %tmp to <2 x i32> %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> @@ -66,7 +66,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 { main_body: - %tmp = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp1 = bitcast <4 x float> %tmp to <4 x i32> %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> @@ -76,9 +76,9 @@ } -declare <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 -declare <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readonly } Index: llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll +++ llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll @@ -7,7 +7,7 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 { bb: %tmp = fptosi float %arg0 to i32 - %tmp1 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> undef, <8 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false) + %tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 undef, <8 x i32> undef, i32 0, i32 0) %tmp2.f = extractelement <4 x float> %tmp1, i32 0 %tmp2 = bitcast float %tmp2.f to i32 %tmp3 = and i32 %tmp, 7 @@ -21,7 +21,7 @@ } declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1 -declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2 +declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } Index: llvm/trunk/test/CodeGen/AMDGPU/constant-address-space-32bit.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/constant-address-space-32bit.ll +++ llvm/trunk/test/CodeGen/AMDGPU/constant-address-space-32bit.ll @@ -204,7 +204,7 @@ ; GCN: v_readfirstlane_b32 ; GCN-NEXT: v_readfirstlane_b32 ; SI: s_nop -; GCN-NEXT: s_load_dwordx8 +; GCN: s_load_dwordx8 ; GCN-NEXT: s_load_dwordx4 ; GCN: image_sample define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler([0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 { @@ -219,7 +219,7 @@ %29 = bitcast [0 x <8 x i32>] addrspace(6)* %1 to [0 x <4 x i32>] addrspace(6)* %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28, !amdgpu.uniform !0 %31 = load <4 x i32>, <4 x i32> addrspace(6)* %30, align 16, !invariant.load !0 - %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8 + %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %31, i1 0, i32 0, i32 0) #8 %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 @@ -238,7 +238,7 @@ ; GCN: v_readfirstlane_b32 ; GCN-NEXT: v_readfirstlane_b32 ; SI: s_nop -; GCN-NEXT: s_load_dwordx8 +; GCN: s_load_dwordx8 ; GCN-NEXT: s_load_dwordx4 ; GCN: image_sample define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler_nouniform([0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 { @@ -253,7 +253,7 @@ %29 = bitcast [0 x <8 x i32>] addrspace(6)* %1 to [0 x <4 x i32>] addrspace(6)* %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28 %31 = load <4 x i32>, <4 x i32> addrspace(6)* %30, align 16, !invariant.load !0 - %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8 + %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %31, i1 0, i32 0, i32 0) #8 %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 @@ -272,7 +272,7 @@ declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6 ; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #7 +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #7 !0 = !{} Index: llvm/trunk/test/CodeGen/AMDGPU/else.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/else.ll +++ llvm/trunk/test/CodeGen/AMDGPU/else.ll @@ -44,7 +44,7 @@ else: %c = fmul float %v, 3.0 - %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %v.else = extractelement <4 x float> %tex, i32 0 br label %end @@ -55,7 +55,7 @@ } declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2 attributes #0 = { nounwind } attributes #1 = { nounwind writeonly } Index: llvm/trunk/test/CodeGen/AMDGPU/image-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/image-schedule.ll +++ llvm/trunk/test/CodeGen/AMDGPU/image-schedule.ll @@ -25,17 +25,18 @@ %tmp10 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp8, i64 0, i64 32 %tmp11 = bitcast i8 addrspace(4)* %tmp10 to <8 x i32> addrspace(4)*, !amdgpu.uniform !0 %tmp12 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp11, align 16 - %tmp13 = shufflevector <3 x i32> %tmp9, <3 x i32> undef, <2 x i32> - %tmp14 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp12, i32 15, i1 false, i1 false, i1 false, i1 false) #0 + %tmp13.0 = extractelement <3 x i32> %tmp9, i32 0 + %tmp13.1 = extractelement <3 x i32> %tmp9, i32 1 + %tmp14 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp12, i32 0, i32 0) #0 %tmp15 = inttoptr i64 %tmp7 to <8 x i32> addrspace(4)* %tmp16 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp15, align 16 - call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp14, <2 x i32> %tmp13, <8 x i32> %tmp16, i32 15, i1 false, i1 false, i1 false, i1 false) #0 + call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %tmp14, i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp16, i32 0, i32 0) #0 %tmp17 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp15, align 16 - %tmp18 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp17, i32 15, i1 false, i1 false, i1 false, i1 false) #0 + %tmp18 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 165, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp17, i32 0, i32 0) #0 %tmp19 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp8, i64 0, i64 64 %tmp20 = bitcast i8 addrspace(4)* %tmp19 to <8 x i32> addrspace(4)*, !amdgpu.uniform !0 %tmp21 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp20, align 16 - call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp18, <2 x i32> %tmp13, <8 x i32> %tmp21, i32 15, i1 false, i1 false, i1 false, i1 false) #0 + call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %tmp18, i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp21, i32 0, i32 0) #0 ret void } @@ -43,10 +44,10 @@ declare i64 @llvm.amdgcn.s.getpc() #1 ; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2 +declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #2 ; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #3 +declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #3 attributes #0 = { nounwind } attributes #1 = { nounwind readnone speculatable } Index: llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -75,10 +75,9 @@ ; GCN-LABEL: {{^}}insertelement_to_sgpr: ; GCN-NOT: v_readfirstlane -define amdgpu_ps <4 x float> @insertelement_to_sgpr() nounwind { - %tmp = load <4 x i32>, <4 x i32> addrspace(2)* undef - %tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0 - %tmp2 = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 false, i1 false, i1 false, i1 false, i1 true) +define amdgpu_ps <4 x float> @insertelement_to_sgpr(<4 x i32> inreg %samp) nounwind { + %tmp1 = insertelement <4 x i32> %samp, i32 0, i32 0 + %tmp2 = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float undef, float undef, <8 x i32> undef, <4 x i32> %tmp1, i1 0, i32 0, i32 0) ret <4 x float> %tmp2 } @@ -474,7 +473,7 @@ ret void } -declare <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll @@ -1,3 +1,4 @@ +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}atomic_swap_1d: Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s ; GCN-LABEL: {{^}}load_1d: ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} @@ -370,6 +370,46 @@ ret void } +; GCN-LABEL: {{^}}getresinfo_dmask0: +; GCN-NOT: image +; GCN: ; return to shader part epilog +define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) #0 { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 0, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %r +} + +; Ideally, the register allocator would avoid the wait here +; +; GCN-LABEL: {{^}}image_store_wait: +; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm +; SI: s_waitcnt expcnt(0) +; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm +; GCN: s_waitcnt vmcnt(0) +; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm +define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 { +main_body: + call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg4, <8 x i32> %arg1, i32 0, i32 0) + call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %data, i32 15, i32 %arg4, <8 x i32> %arg2, i32 0, i32 0) + ret void +} + +; SI won't merge ds memory operations, because of the signed offset bug, so +; we only have check lines for VI. +; VI-LABEL: image_load_mmo +; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 +; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4 +define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 { + store float 0.000000e+00, float addrspace(3)* %lds + %c0 = extractelement <2 x i32> %c, i32 0 + %c1 = extractelement <2 x i32> %c, i32 1 + %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 15, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0) + %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4 + store float 0.000000e+00, float addrspace(3)* %tmp2 + ret float %tex +} + declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 @@ -412,6 +452,7 @@ declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1 +declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1 declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0 declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0 Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll @@ -1,3 +1,4 @@ +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s ; GCN-LABEL: {{^}}gather4_2d: Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll @@ -0,0 +1,118 @@ +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s + +; GCN-LABEL: {{^}}gather4_o_2d: +; GCN: image_gather4_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +define amdgpu_ps <4 x float> @gather4_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}gather4_c_o_2d: +; GCN: image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +define amdgpu_ps <4 x float> @gather4_c_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}gather4_cl_o_2d: +; GCN: image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +define amdgpu_ps <4 x float> @gather4_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}gather4_c_cl_o_2d: +; GCN: image_gather4_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}} +define amdgpu_ps <4 x float> @gather4_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}gather4_b_o_2d: +; GCN: image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +define amdgpu_ps <4 x float> @gather4_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}gather4_c_b_o_2d: +; GCN: image_gather4_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}} +define amdgpu_ps <4 x float> @gather4_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}gather4_b_cl_o_2d: +; GCN: image_gather4_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}} +define amdgpu_ps <4 x float> @gather4_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}gather4_c_b_cl_o_2d: +; GCN: image_gather4_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}} +define amdgpu_ps <4 x float> @gather4_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}gather4_l_o_2d: +; GCN: image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}gather4_c_l_o_2d: +; GCN: image_gather4_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}} +define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}gather4_lz_o_2d: +; GCN: image_gather4_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +define amdgpu_ps <4 x float> @gather4_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}gather4_c_lz_o_2d: +; GCN: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +define amdgpu_ps <4 x float> @gather4_c_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll @@ -1,3 +1,4 @@ +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s ; GCN-LABEL: {{^}}sample_1d: @@ -400,6 +401,95 @@ ret <4 x float> %v } +; GCN-LABEL: {{^}}adjust_writemask_sample_0: +; GCN: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1{{$}} +define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %elt0 = extractelement <4 x float> %r, i32 0 + ret float %elt0 +} + +; GCN-LABEL: {{^}}adjust_writemask_sample_01 +; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3{{$}} +define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> + ret <2 x float> %out +} + +; GCN-LABEL: {{^}}adjust_writemask_sample_012 +; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7{{$}} +define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> + ret <3 x float> %out +} + +; GCN-LABEL: {{^}}adjust_writemask_sample_12 +; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6{{$}} +define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> + ret <2 x float> %out +} + +; GCN-LABEL: {{^}}adjust_writemask_sample_03 +; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9{{$}} +define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> + ret <2 x float> %out +} + +; GCN-LABEL: {{^}}adjust_writemask_sample_13 +; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa{{$}} +define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> + ret <2 x float> %out +} + +; GCN-LABEL: {{^}}adjust_writemask_sample_123 +; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe{{$}} +define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> + ret <3 x float> %out +} + +; GCN-LABEL: {{^}}adjust_writemask_sample_none_enabled +; GCN-NOT: image +define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %r +} + +; GCN-LABEL: {{^}}adjust_writemask_sample_123_to_12 +; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6{{$}} +define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> + ret <2 x float> %out +} + +; GCN-LABEL: {{^}}adjust_writemask_sample_013_to_13 +; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa{{$}} +define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> + ret <2 x float> %out +} + declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll @@ -0,0 +1,371 @@ +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s + +; GCN-LABEL: {{^}}sample_o_1d: +; GCN: image_sample_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_o_2d: +; GCN: image_sample_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_o_1d: +; GCN: image_sample_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_o_2d: +; GCN: image_sample_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_cl_o_1d: +; GCN: image_sample_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_cl_o_2d: +; GCN: image_sample_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_cl_o_1d: +; GCN: image_sample_c_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_cl_o_2d: +; GCN: image_sample_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_b_o_1d: +; GCN: image_sample_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_b_o_2d: +; GCN: image_sample_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_b_o_1d: +; GCN: image_sample_c_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_b_o_2d: +; GCN: image_sample_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_b_cl_o_1d: +; GCN: image_sample_b_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_b_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_b_cl_o_2d: +; GCN: image_sample_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_b_cl_o_1d: +; GCN: image_sample_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_b_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_b_cl_o_2d: +; GCN: image_sample_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_d_o_1d: +; GCN: image_sample_d_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_d_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dsdv, float %s) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_d_o_2d: +; GCN: image_sample_d_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_d_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_d_o_1d: +; GCN: image_sample_c_d_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_d_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_d_o_2d: +; GCN: image_sample_c_d_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_d_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_d_cl_o_1d: +; GCN: image_sample_d_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_d_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_d_cl_o_2d: +; GCN: image_sample_d_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_d_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_d_cl_o_1d: +; GCN: image_sample_c_d_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_d_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_d_cl_o_2d: +; GCN: image_sample_c_d_cl_o v[0:3], v[0:15], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_d_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_cd_o_1d: +; GCN: image_sample_cd_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_cd_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dsdv, float %s) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_cd_o_2d: +; GCN: image_sample_cd_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_cd_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_cd_o_1d: +; GCN: image_sample_c_cd_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_cd_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_cd_o_2d: +; GCN: image_sample_c_cd_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_cd_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_cd_cl_o_1d: +; GCN: image_sample_cd_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_cd_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_cd_cl_o_2d: +; GCN: image_sample_cd_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_cd_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_cd_cl_o_1d: +; GCN: image_sample_c_cd_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_cd_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_cd_cl_o_2d: +; GCN: image_sample_c_cd_cl_o v[0:3], v[0:15], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_cd_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_l_o_1d: +; GCN: image_sample_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_l_o_2d: +; GCN: image_sample_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_l_o_1d: +; GCN: image_sample_c_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_l_o_2d: +; GCN: image_sample_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_lz_o_1d: +; GCN: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_lz_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_lz_o_2d: +; GCN: image_sample_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_lz_o_1d: +; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_lz_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_c_lz_o_2d: +; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +define amdgpu_ps <4 x float> @sample_c_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll @@ -26,7 +26,7 @@ %live = call i1 @llvm.amdgcn.ps.live() %live.32 = zext i1 %live to i32 %live.32.bc = bitcast i32 %live.32 to float - %t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %live.32.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %live.32.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %r = extractelement <4 x float> %t, i32 0 ret float %r } @@ -49,13 +49,13 @@ end: %tc = phi i32 [ %in, %entry ], [ %tc.dead, %dead ] %tc.bc = bitcast i32 %tc to float - %t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %tc.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tc.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0 %r = extractelement <4 x float> %t, i32 0 ret float %r } declare i1 @llvm.amdgcn.ps.live() #1 -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll @@ -8,9 +8,9 @@ ; CHECK-NEXT: image_store ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float> %d1, i32 %c0, i32 %c1) { - call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %d0, i32 %c0, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 1, i1 0) + call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %d0, i32 15, i32 %c0, <8 x i32> %rsrc, i32 0, i32 0) call void @llvm.amdgcn.s.waitcnt(i32 3840) ; 0xf00 - call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %d1, i32 %c1, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 1, i1 0) + call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %d1, i32 15, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0) ret void } @@ -24,17 +24,17 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0){{$}} ; CHECK-NEXT: image_store define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, i32 %c) { - %t = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + %t = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %c, <8 x i32> %rsrc, i32 0, i32 0) call void @llvm.amdgcn.s.waitcnt(i32 3840) ; 0xf00 %c.1 = mul i32 %c, 2 - call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %t, i32 %c.1, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %t, i32 15, i32 %c.1, <8 x i32> %rsrc, i32 0, i32 0) ret void } declare void @llvm.amdgcn.s.waitcnt(i32) #0 -declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0 +declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 +declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0 attributes #0 = { nounwind } attributes #1 = { nounwind readonly } Index: llvm/trunk/test/CodeGen/AMDGPU/print-mir-custom-pseudo.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/print-mir-custom-pseudo.ll +++ llvm/trunk/test/CodeGen/AMDGPU/print-mir-custom-pseudo.ll @@ -10,8 +10,8 @@ define dllexport amdgpu_ps <2 x float> @_amdgpu_ps_main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, <2 x float>, <2 x float>, <2 x float>, <3 x float>, <2 x float>, <2 x float>, <2 x float>, float, float, float, float, float, i32, i32, i32, i32) local_unnamed_addr { .entry: - %res = call <2 x float> @llvm.amdgcn.image.sample.l.v2f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false) + %res = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) ret <2 x float> %res } -declare <2 x float> @llvm.amdgcn.image.sample.l.v2f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) +declare <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) Index: llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll +++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll @@ -81,13 +81,8 @@ %j.f.i4 = bitcast i32 %j.i2 to float %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 1, i32 %arg3) #1 %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 1, i32 %arg3) #1 - %tmp45 = bitcast float %p2.i to i32 - %tmp46 = bitcast float %p2.i24 to i32 - %tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0 - %tmp48 = insertelement <2 x i32> %tmp47, i32 %tmp46, i32 1 %tmp39.bc = bitcast <4 x i32> %tmp39 to <4 x i32> - %a.bc.i = bitcast <2 x i32> %tmp48 to <2 x float> - %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i24, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i1 0, i32 0, i32 0) %tmp50 = extractelement <4 x float> %tmp1, i32 2 %tmp51 = call float @llvm.fabs.f32(float %tmp50) %tmp52 = fmul float %p2.i18, %p2.i18 @@ -240,14 +235,14 @@ br i1 %tmp27, label %if, label %else if: ; preds = %entry - %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> , <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0x36D6000000000000, float 0x36DA000000000000, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i1 0, i32 0, i32 0) %val.if.0 = extractelement <4 x float> %tmp1, i32 0 %val.if.1 = extractelement <4 x float> %tmp1, i32 1 %val.if.2 = extractelement <4 x float> %tmp1, i32 2 br label %endif else: ; preds = %entry - %tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> , <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp2 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0x36C4000000000000, float 0x36CC000000000000, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i1 0, i32 0, i32 0) %val.else.0 = extractelement <4 x float> %tmp2, i32 0 %val.else.1 = extractelement <4 x float> %tmp2, i32 1 %val.else.2 = extractelement <4 x float> %tmp2, i32 2 @@ -352,24 +347,18 @@ br i1 %tmp36, label %bb38, label %bb80 bb38: ; preds = %bb - %tmp52 = bitcast float %p2.i to i32 - %tmp53 = bitcast float %p2.i6 to i32 - %tmp54 = insertelement <2 x i32> undef, i32 %tmp52, i32 0 - %tmp55 = insertelement <2 x i32> %tmp54, i32 %tmp53, i32 1 %tmp56 = bitcast <8 x i32> %tmp26 to <8 x i32> - %a.bc.i = bitcast <2 x i32> %tmp55 to <2 x float> - %tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp56, <4 x i32> %tmp28, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp2 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i6, <8 x i32> %tmp56, <4 x i32> %tmp28, i1 0, i32 0, i32 0) br label %bb71 bb80: ; preds = %bb %tmp81 = bitcast float %p2.i to i32 %tmp82 = bitcast float %p2.i6 to i32 %tmp82.2 = add i32 %tmp82, 1 - %tmp83 = insertelement <2 x i32> undef, i32 %tmp81, i32 0 - %tmp84 = insertelement <2 x i32> %tmp83, i32 %tmp82.2, i32 1 + %tmp83 = bitcast i32 %tmp81 to float + %tmp84 = bitcast i32 %tmp82.2 to float %tmp85 = bitcast <8 x i32> %tmp26 to <8 x i32> - %a.bc.i1 = bitcast <2 x i32> %tmp84 to <2 x float> - %tmp3 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i1, <8 x i32> %tmp85, <4 x i32> %tmp28, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp3 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp83, float %tmp84, <8 x i32> %tmp85, <4 x i32> %tmp28, i1 0, i32 0, i32 0) br label %bb71 bb71: ; preds = %bb80, %bb38 @@ -387,7 +376,7 @@ %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i32 0, i32 %tid %tmp8 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp7, align 32, !tbaa !0 - %tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> , <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 7.500000e-01, float 2.500000e-01, <8 x i32> %tmp8, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp10 = extractelement <4 x float> %tmp, i32 0 %tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %tmp10) call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0 @@ -402,7 +391,7 @@ %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i32 0, i32 %tid %tmp8 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp7, align 16, !tbaa !0 - %tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> , <8 x i32> undef, <4 x i32> %tmp8, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 7.500000e-01, float 2.500000e-01, <8 x i32> undef, <4 x i32> %tmp8, i1 0, i32 0, i32 0) %tmp10 = extractelement <4 x float> %tmp, i32 0 %tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef) call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0 @@ -419,7 +408,7 @@ declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1 -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 +declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1 attributes #0 = { nounwind } Index: llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll +++ llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll @@ -34,14 +34,9 @@ %j.f.i4 = bitcast i32 %j.i2 to float %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) #1 %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) #1 - %tmp27 = bitcast float %p2.i to i32 - %tmp28 = bitcast float %p2.i6 to i32 - %tmp29 = insertelement <2 x i32> undef, i32 %tmp27, i32 0 - %tmp30 = insertelement <2 x i32> %tmp29, i32 %tmp28, i32 1 %tmp22.bc = bitcast <32 x i8> %tmp22 to <8 x i32> %tmp24.bc = bitcast <16 x i8> %tmp24 to <4 x i32> - %tmp30.bc = bitcast <2 x i32> %tmp30 to <2 x float> - %tmp31 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp30.bc, <8 x i32> %tmp22.bc, <4 x i32> %tmp24.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp31 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i6, <8 x i32> %tmp22.bc, <4 x i32> %tmp24.bc, i1 0, i32 0, i32 0) %tmp32 = extractelement <4 x float> %tmp31, i32 0 %tmp33 = extractelement <4 x float> %tmp31, i32 1 @@ -57,7 +52,7 @@ declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1 -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 +declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } Index: llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll +++ llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll @@ -324,14 +324,6 @@ %tmp213 = fmul float %tmp205, %tmp191 %tmp214 = fmul float %tmp206, %tmp191 %tmp215 = fmul float -1.000000e+00, %tmp191 - %tmp216 = bitcast float %tmp135 to i32 - %tmp217 = bitcast float %tmp181 to i32 - %tmp218 = bitcast float %tmp136 to i32 - %tmp219 = bitcast float %tmp182 to i32 - %tmp220 = insertelement <8 x i32> undef, i32 %tmp216, i32 0 - %tmp221 = insertelement <8 x i32> %tmp220, i32 %tmp217, i32 1 - %tmp222 = insertelement <8 x i32> %tmp221, i32 %tmp218, i32 2 - %tmp223 = insertelement <8 x i32> %tmp222, i32 %tmp219, i32 3 br label %LOOP LOOP: ; preds = %ENDIF, %main_body @@ -358,14 +350,7 @@ br label %LOOP65 ENDIF: ; preds = %LOOP - %tmp237 = bitcast float %temp28.0 to i32 - %tmp238 = bitcast float %temp29.0 to i32 - %tmp239 = insertelement <8 x i32> %tmp223, i32 %tmp237, i32 4 - %tmp240 = insertelement <8 x i32> %tmp239, i32 %tmp238, i32 5 - %tmp241 = insertelement <8 x i32> %tmp240, i32 undef, i32 6 - %tmp242 = insertelement <8 x i32> %tmp241, i32 undef, i32 7 - %tmp242.bc = bitcast <8 x i32> %tmp242 to <8 x float> - %tmp243 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp242.bc, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp243 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.0, float %temp29.0, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i1 0, i32 0, i32 0) %tmp244 = extractelement <4 x float> %tmp243, i32 3 %tmp245 = fcmp oge float %temp30.0, %tmp244 %tmp246 = sext i1 %tmp245 to i32 @@ -396,65 +381,20 @@ br i1 %tmp262, label %IF67, label %ENDIF66 IF67: ; preds = %LOOP65 - %tmp263 = bitcast float %tmp135 to i32 - %tmp264 = bitcast float %tmp181 to i32 - %tmp265 = bitcast float %tmp136 to i32 - %tmp266 = bitcast float %tmp182 to i32 - %tmp267 = bitcast float %temp28.1 to i32 - %tmp268 = bitcast float %temp29.1 to i32 - %tmp269 = insertelement <8 x i32> undef, i32 %tmp263, i32 0 - %tmp270 = insertelement <8 x i32> %tmp269, i32 %tmp264, i32 1 - %tmp271 = insertelement <8 x i32> %tmp270, i32 %tmp265, i32 2 - %tmp272 = insertelement <8 x i32> %tmp271, i32 %tmp266, i32 3 - %tmp273 = insertelement <8 x i32> %tmp272, i32 %tmp267, i32 4 - %tmp274 = insertelement <8 x i32> %tmp273, i32 %tmp268, i32 5 - %tmp275 = insertelement <8 x i32> %tmp274, i32 undef, i32 6 - %tmp276 = insertelement <8 x i32> %tmp275, i32 undef, i32 7 %tmp67.bc = bitcast <4 x i32> %tmp67 to <4 x i32> - %tmp276.bc = bitcast <8 x i32> %tmp276 to <8 x float> - %tmp277 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp276.bc, <8 x i32> %tmp65, <4 x i32> %tmp67.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp277 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp65, <4 x i32> %tmp67.bc, i1 0, i32 0, i32 0) %tmp278 = extractelement <4 x float> %tmp277, i32 0 %tmp279 = extractelement <4 x float> %tmp277, i32 1 %tmp280 = extractelement <4 x float> %tmp277, i32 2 %tmp281 = extractelement <4 x float> %tmp277, i32 3 %tmp282 = fmul float %tmp281, %tmp46 - %tmp283 = bitcast float %tmp135 to i32 - %tmp284 = bitcast float %tmp181 to i32 - %tmp285 = bitcast float %tmp136 to i32 - %tmp286 = bitcast float %tmp182 to i32 - %tmp287 = bitcast float %temp28.1 to i32 - %tmp288 = bitcast float %temp29.1 to i32 - %tmp289 = insertelement <8 x i32> undef, i32 %tmp283, i32 0 - %tmp290 = insertelement <8 x i32> %tmp289, i32 %tmp284, i32 1 - %tmp291 = insertelement <8 x i32> %tmp290, i32 %tmp285, i32 2 - %tmp292 = insertelement <8 x i32> %tmp291, i32 %tmp286, i32 3 - %tmp293 = insertelement <8 x i32> %tmp292, i32 %tmp287, i32 4 - %tmp294 = insertelement <8 x i32> %tmp293, i32 %tmp288, i32 5 - %tmp295 = insertelement <8 x i32> %tmp294, i32 undef, i32 6 - %tmp296 = insertelement <8 x i32> %tmp295, i32 undef, i32 7 %tmp83.bc = bitcast <4 x i32> %tmp83 to <4 x i32> - %tmp296.bc = bitcast <8 x i32> %tmp296 to <8 x float> - %tmp297 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp296.bc, <8 x i32> %tmp81, <4 x i32> %tmp83.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp297 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp81, <4 x i32> %tmp83.bc, i1 0, i32 0, i32 0) %tmp298 = extractelement <4 x float> %tmp297, i32 0 %tmp299 = extractelement <4 x float> %tmp297, i32 1 %tmp300 = extractelement <4 x float> %tmp297, i32 2 - %tmp301 = bitcast float %tmp135 to i32 - %tmp302 = bitcast float %tmp181 to i32 - %tmp303 = bitcast float %tmp136 to i32 - %tmp304 = bitcast float %tmp182 to i32 - %tmp305 = bitcast float %temp28.1 to i32 - %tmp306 = bitcast float %temp29.1 to i32 - %tmp307 = insertelement <8 x i32> undef, i32 %tmp301, i32 0 - %tmp308 = insertelement <8 x i32> %tmp307, i32 %tmp302, i32 1 - %tmp309 = insertelement <8 x i32> %tmp308, i32 %tmp303, i32 2 - %tmp310 = insertelement <8 x i32> %tmp309, i32 %tmp304, i32 3 - %tmp311 = insertelement <8 x i32> %tmp310, i32 %tmp305, i32 4 - %tmp312 = insertelement <8 x i32> %tmp311, i32 %tmp306, i32 5 - %tmp313 = insertelement <8 x i32> %tmp312, i32 undef, i32 6 - %tmp314 = insertelement <8 x i32> %tmp313, i32 undef, i32 7 %tmp79.bc = bitcast <4 x i32> %tmp79 to <4 x i32> - %tmp314.bc = bitcast <8 x i32> %tmp314 to <8 x float> - %tmp315 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp314.bc, <8 x i32> %tmp77, <4 x i32> %tmp79.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp315 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp77, <4 x i32> %tmp79.bc, i1 0, i32 0, i32 0) %tmp316 = extractelement <4 x float> %tmp315, i32 0 %tmp317 = extractelement <4 x float> %tmp315, i32 1 %tmp318 = extractelement <4 x float> %tmp315, i32 2 @@ -470,22 +410,7 @@ %tmp328 = fadd float %tmp278, %tmp323 %tmp329 = fadd float %tmp279, %tmp325 %tmp330 = fadd float %tmp280, %tmp327 - %tmp331 = bitcast float %tmp135 to i32 - %tmp332 = bitcast float %tmp181 to i32 - %tmp333 = bitcast float %tmp136 to i32 - %tmp334 = bitcast float %tmp182 to i32 - %tmp335 = bitcast float %temp28.1 to i32 - %tmp336 = bitcast float %temp29.1 to i32 - %tmp337 = insertelement <8 x i32> undef, i32 %tmp331, i32 0 - %tmp338 = insertelement <8 x i32> %tmp337, i32 %tmp332, i32 1 - %tmp339 = insertelement <8 x i32> %tmp338, i32 %tmp333, i32 2 - %tmp340 = insertelement <8 x i32> %tmp339, i32 %tmp334, i32 3 - %tmp341 = insertelement <8 x i32> %tmp340, i32 %tmp335, i32 4 - %tmp342 = insertelement <8 x i32> %tmp341, i32 %tmp336, i32 5 - %tmp343 = insertelement <8 x i32> %tmp342, i32 undef, i32 6 - %tmp344 = insertelement <8 x i32> %tmp343, i32 undef, i32 7 - %tmp344.bc = bitcast <8 x i32> %tmp344 to <8 x float> - %tmp345 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp344.bc, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp345 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i1 0, i32 0, i32 0) %tmp346 = extractelement <4 x float> %tmp345, i32 0 %tmp347 = extractelement <4 x float> %tmp345, i32 1 %tmp348 = extractelement <4 x float> %tmp345, i32 2 @@ -501,23 +426,8 @@ %tmp358 = fmul float %tmp349, %tmp357 %tmp359 = fmul float %tmp350, %tmp357 %tmp360 = fmul float %tmp351, %tmp357 - %tmp361 = bitcast float %tmp135 to i32 - %tmp362 = bitcast float %tmp181 to i32 - %tmp363 = bitcast float %tmp136 to i32 - %tmp364 = bitcast float %tmp182 to i32 - %tmp365 = bitcast float %temp28.1 to i32 - %tmp366 = bitcast float %temp29.1 to i32 - %tmp367 = insertelement <8 x i32> undef, i32 %tmp361, i32 0 - %tmp368 = insertelement <8 x i32> %tmp367, i32 %tmp362, i32 1 - %tmp369 = insertelement <8 x i32> %tmp368, i32 %tmp363, i32 2 - %tmp370 = insertelement <8 x i32> %tmp369, i32 %tmp364, i32 3 - %tmp371 = insertelement <8 x i32> %tmp370, i32 %tmp365, i32 4 - %tmp372 = insertelement <8 x i32> %tmp371, i32 %tmp366, i32 5 - %tmp373 = insertelement <8 x i32> %tmp372, i32 undef, i32 6 - %tmp374 = insertelement <8 x i32> %tmp373, i32 undef, i32 7 %tmp71.bc = bitcast <4 x i32> %tmp71 to <4 x i32> - %tmp374.bc = bitcast <8 x i32> %tmp374 to <8 x float> - %tmp375 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp374.bc, <8 x i32> %tmp69, <4 x i32> %tmp71.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp375 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp69, <4 x i32> %tmp71.bc, i1 0, i32 0, i32 0) %tmp376 = extractelement <4 x float> %tmp375, i32 0 %tmp377 = extractelement <4 x float> %tmp375, i32 1 %tmp378 = extractelement <4 x float> %tmp375, i32 2 @@ -557,23 +467,8 @@ %tmp412 = fadd float %tmp411, %tmp406 %tmp413 = fmul float %tmp399, %p2.i24 %tmp414 = fadd float %tmp413, %tmp408 - %tmp415 = bitcast float %tmp135 to i32 - %tmp416 = bitcast float %tmp181 to i32 - %tmp417 = bitcast float %tmp136 to i32 - %tmp418 = bitcast float %tmp182 to i32 - %tmp419 = bitcast float %temp28.1 to i32 - %tmp420 = bitcast float %temp29.1 to i32 - %tmp421 = insertelement <8 x i32> undef, i32 %tmp415, i32 0 - %tmp422 = insertelement <8 x i32> %tmp421, i32 %tmp416, i32 1 - %tmp423 = insertelement <8 x i32> %tmp422, i32 %tmp417, i32 2 - %tmp424 = insertelement <8 x i32> %tmp423, i32 %tmp418, i32 3 - %tmp425 = insertelement <8 x i32> %tmp424, i32 %tmp419, i32 4 - %tmp426 = insertelement <8 x i32> %tmp425, i32 %tmp420, i32 5 - %tmp427 = insertelement <8 x i32> %tmp426, i32 undef, i32 6 - %tmp428 = insertelement <8 x i32> %tmp427, i32 undef, i32 7 %tmp87.bc = bitcast <4 x i32> %tmp87 to <4 x i32> - %tmp428.bc = bitcast <8 x i32> %tmp428 to <8 x float> - %tmp429 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp428.bc, <8 x i32> %tmp85, <4 x i32> %tmp87.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp429 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp85, <4 x i32> %tmp87.bc, i1 0, i32 0, i32 0) %tmp430 = extractelement <4 x float> %tmp429, i32 0 %tmp431 = extractelement <4 x float> %tmp429, i32 1 %tmp432 = extractelement <4 x float> %tmp429, i32 2 @@ -617,16 +512,8 @@ %tmp460 = fadd float %tmp459, 1.500000e+00 %tmp461 = fmul float %tmp454, %tmp458 %tmp462 = fadd float %tmp461, 1.500000e+00 - %tmp463 = bitcast float %tmp462 to i32 - %tmp464 = bitcast float %tmp460 to i32 - %tmp465 = bitcast float %tmp456 to i32 - %tmp466 = insertelement <4 x i32> undef, i32 %tmp463, i32 0 - %tmp467 = insertelement <4 x i32> %tmp466, i32 %tmp464, i32 1 - %tmp468 = insertelement <4 x i32> %tmp467, i32 %tmp465, i32 2 - %tmp469 = insertelement <4 x i32> %tmp468, i32 undef, i32 3 %tmp91.bc = bitcast <4 x i32> %tmp91 to <4 x i32> - %tmp469.bc = bitcast <4 x i32> %tmp469 to <4 x float> - %tmp470 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tmp469.bc, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %tmp470 = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %tmp462, float %tmp460, float %tmp456, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i1 0, i32 0, i32 0) #0 %tmp471 = extractelement <4 x float> %tmp470, i32 0 %tmp472 = extractelement <4 x float> %tmp470, i32 1 %tmp473 = extractelement <4 x float> %tmp470, i32 2 @@ -713,23 +600,8 @@ %tmp554 = fadd float %tmp553, %tmp549 %tmp555 = fmul float %tmp547, %tmp58 %tmp556 = fadd float %tmp555, %tmp550 - %tmp557 = bitcast float %tmp135 to i32 - %tmp558 = bitcast float %tmp181 to i32 - %tmp559 = bitcast float %tmp136 to i32 - %tmp560 = bitcast float %tmp182 to i32 - %tmp561 = bitcast float %temp28.1 to i32 - %tmp562 = bitcast float %temp29.1 to i32 - %tmp563 = insertelement <8 x i32> undef, i32 %tmp557, i32 0 - %tmp564 = insertelement <8 x i32> %tmp563, i32 %tmp558, i32 1 - %tmp565 = insertelement <8 x i32> %tmp564, i32 %tmp559, i32 2 - %tmp566 = insertelement <8 x i32> %tmp565, i32 %tmp560, i32 3 - %tmp567 = insertelement <8 x i32> %tmp566, i32 %tmp561, i32 4 - %tmp568 = insertelement <8 x i32> %tmp567, i32 %tmp562, i32 5 - %tmp569 = insertelement <8 x i32> %tmp568, i32 undef, i32 6 - %tmp570 = insertelement <8 x i32> %tmp569, i32 undef, i32 7 %tmp75.bc = bitcast <4 x i32> %tmp75 to <4 x i32> - %tmp570.bc = bitcast <8 x i32> %tmp570 to <8 x float> - %tmp571 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp570.bc, <8 x i32> %tmp73, <4 x i32> %tmp75.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp571 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp73, <4 x i32> %tmp75.bc, i1 0, i32 0, i32 0) %tmp572 = extractelement <4 x float> %tmp571, i32 0 %tmp573 = extractelement <4 x float> %tmp571, i32 1 %tmp574 = extractelement <4 x float> %tmp571, i32 2 @@ -745,14 +617,7 @@ ret void ENDIF66: ; preds = %LOOP65 - %tmp585 = bitcast float %temp28.1 to i32 - %tmp586 = bitcast float %temp29.1 to i32 - %tmp587 = insertelement <8 x i32> %tmp236, i32 %tmp585, i32 4 - %tmp588 = insertelement <8 x i32> %tmp587, i32 %tmp586, i32 5 - %tmp589 = insertelement <8 x i32> %tmp588, i32 undef, i32 6 - %tmp590 = insertelement <8 x i32> %tmp589, i32 undef, i32 7 - %tmp590.bc = bitcast <8 x i32> %tmp590 to <8 x float> - %tmp591 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp590.bc, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp591 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i1 0, i32 0, i32 0) %tmp592 = extractelement <4 x float> %tmp591, i32 3 %tmp593 = fcmp oge float %temp30.1, %tmp592 %tmp594 = sext i1 %tmp593 to i32 @@ -1140,13 +1005,8 @@ %tmp218 = fmul float %., %tmp53 %tmp219 = fmul float %arg13, %tmp46 %tmp220 = fmul float %tmp196, %tmp47 - %tmp221 = bitcast float %p2.i132 to i32 - %tmp222 = bitcast float %p2.i126 to i32 - %tmp223 = insertelement <2 x i32> undef, i32 %tmp221, i32 0 - %tmp224 = insertelement <2 x i32> %tmp223, i32 %tmp222, i32 1 %tmp132.bc = bitcast <4 x i32> %tmp132 to <4 x i32> - %tmp224.bc = bitcast <2 x i32> %tmp224 to <2 x float> - %tmp225 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp224.bc, <8 x i32> %tmp130, <4 x i32> %tmp132.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp225 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i132, float %p2.i126, <8 x i32> %tmp130, <4 x i32> %tmp132.bc, i1 0, i32 0, i32 0) %tmp226 = extractelement <4 x float> %tmp225, i32 0 %tmp227 = extractelement <4 x float> %tmp225, i32 1 %tmp228 = extractelement <4 x float> %tmp225, i32 2 @@ -1220,7 +1080,7 @@ %tmp281 = insertelement <4 x i32> %tmp280, i32 undef, i32 3 %tmp148.bc = bitcast <4 x i32> %tmp148 to <4 x i32> %tmp281.bc = bitcast <4 x i32> %tmp281 to <4 x float> - %tmp282 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp281.bc, <8 x i32> %tmp146, <4 x i32> %tmp148.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp282 = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %temp168.0, float %temp169.0, float 0.0, <8 x i32> %tmp146, <4 x i32> %tmp148.bc, i1 0, i32 0, i32 0) %tmp283 = extractelement <4 x float> %tmp282, i32 3 %tmp284 = fadd float %temp168.0, %tmp273 %tmp285 = fadd float %temp169.0, %tmp274 @@ -1279,13 +1139,8 @@ %tmp335 = fadd float %p2.i162, %tmp329 %tmp336 = fadd float %p2.i156, %tmp331 %tmp337 = fadd float %p2.i150, %tmp333 - %tmp338 = bitcast float %tmp334 to i32 - %tmp339 = bitcast float %tmp335 to i32 - %tmp340 = insertelement <2 x i32> undef, i32 %tmp338, i32 0 - %tmp341 = insertelement <2 x i32> %tmp340, i32 %tmp339, i32 1 %tmp136.bc = bitcast <4 x i32> %tmp136 to <4 x i32> - %a.bc.i = bitcast <2 x i32> %tmp341 to <2 x float> - %tmp0 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp134, <4 x i32> %tmp136.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp0 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp334, float %tmp335, <8 x i32> %tmp134, <4 x i32> %tmp136.bc, i1 0, i32 0, i32 0) %tmp343 = extractelement <4 x float> %tmp0, i32 0 %tmp344 = extractelement <4 x float> %tmp0, i32 1 %tmp345 = extractelement <4 x float> %tmp0, i32 2 @@ -1313,25 +1168,15 @@ %one.sub.ac.i30 = fmul float %one.sub.a.i29, %tmp353 %mul.i31 = fmul float %tmp345, %tmp353 %result.i32 = fadd float %mul.i31, %one.sub.ac.i30 - %tmp358 = bitcast float %tmp336 to i32 - %tmp359 = bitcast float %tmp337 to i32 - %tmp360 = insertelement <2 x i32> undef, i32 %tmp358, i32 0 - %tmp361 = insertelement <2 x i32> %tmp360, i32 %tmp359, i32 1 %tmp152.bc = bitcast <4 x i32> %tmp152 to <4 x i32> - %a.bc.i3 = bitcast <2 x i32> %tmp361 to <2 x float> - %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i3, <8 x i32> %tmp150, <4 x i32> %tmp152.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp336, float %tmp337, <8 x i32> %tmp150, <4 x i32> %tmp152.bc, i1 0, i32 0, i32 0) %tmp363 = extractelement <4 x float> %tmp1, i32 2 %tmp364 = fmul float %result.i40, %result.i %tmp365 = fmul float %result.i36, %result.i44 %tmp366 = fmul float %result.i32, %result.i42 %tmp367 = fmul float %tmp354, %tmp229 - %tmp368 = bitcast float %tmp310 to i32 - %tmp369 = bitcast float %tmp311 to i32 - %tmp370 = insertelement <2 x i32> undef, i32 %tmp368, i32 0 - %tmp371 = insertelement <2 x i32> %tmp370, i32 %tmp369, i32 1 %tmp140.bc = bitcast <4 x i32> %tmp140 to <4 x i32> - %a.bc.i2 = bitcast <2 x i32> %tmp371 to <2 x float> - %tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i2, <8 x i32> %tmp138, <4 x i32> %tmp140.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp2 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp310, float %tmp311, <8 x i32> %tmp138, <4 x i32> %tmp140.bc, i1 0, i32 0, i32 0) %tmp373 = extractelement <4 x float> %tmp2, i32 0 %tmp374 = extractelement <4 x float> %tmp2, i32 1 %tmp375 = extractelement <4 x float> %tmp2, i32 2 @@ -1343,13 +1188,8 @@ %tmp381 = icmp ne i32 %tmp380, 0 %.224 = select i1 %tmp381, float %tmp374, float %tmp373 %.225 = select i1 %tmp381, float %tmp376, float %tmp374 - %tmp382 = bitcast float %tmp320 to i32 - %tmp383 = bitcast float %tmp321 to i32 - %tmp384 = insertelement <2 x i32> undef, i32 %tmp382, i32 0 - %tmp385 = insertelement <2 x i32> %tmp384, i32 %tmp383, i32 1 %tmp144.bc = bitcast <4 x i32> %tmp144 to <4 x i32> - %a.bc.i1 = bitcast <2 x i32> %tmp385 to <2 x float> - %tmp3 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i1, <8 x i32> %tmp142, <4 x i32> %tmp144.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp3 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp320, float %tmp321, <8 x i32> %tmp142, <4 x i32> %tmp144.bc, i1 0, i32 0, i32 0) %tmp387 = extractelement <4 x float> %tmp3, i32 0 %tmp388 = extractelement <4 x float> %tmp3, i32 1 %tmp389 = extractelement <4 x float> %tmp3, i32 2 @@ -1442,13 +1282,8 @@ %temp14.0 = phi float [ %tmp465, %IF198 ], [ %tmp457, %IF189 ] %temp13.0 = phi float [ %tmp464, %IF198 ], [ %tmp456, %IF189 ] %temp12.0 = phi float [ %tmp463, %IF198 ], [ %tmp455, %IF189 ] - %tmp466 = bitcast float %tmp219 to i32 - %tmp467 = bitcast float %tmp220 to i32 - %tmp468 = insertelement <2 x i32> undef, i32 %tmp466, i32 0 - %tmp469 = insertelement <2 x i32> %tmp468, i32 %tmp467, i32 1 %tmp160.bc = bitcast <4 x i32> %tmp160 to <4 x i32> - %tmp469.bc = bitcast <2 x i32> %tmp469 to <2 x float> - %tmp470 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp469.bc, <8 x i32> %tmp158, <4 x i32> %tmp160.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp470 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp219, float %tmp220, <8 x i32> %tmp158, <4 x i32> %tmp160.bc, i1 0, i32 0, i32 0) %tmp471 = extractelement <4 x float> %tmp470, i32 0 %tmp472 = extractelement <4 x float> %tmp470, i32 1 %tmp473 = extractelement <4 x float> %tmp470, i32 2 @@ -1461,13 +1296,8 @@ %tmp480 = fadd float %tmp479, %tmp40 %tmp481 = fmul float %tmp474, %tmp41 %tmp482 = fadd float %tmp481, %tmp42 - %tmp483 = bitcast float %p2.i144 to i32 - %tmp484 = bitcast float %p2.i138 to i32 - %tmp485 = insertelement <2 x i32> undef, i32 %tmp483, i32 0 - %tmp486 = insertelement <2 x i32> %tmp485, i32 %tmp484, i32 1 %tmp156.bc = bitcast <4 x i32> %tmp156 to <4 x i32> - %tmp486.bc = bitcast <2 x i32> %tmp486 to <2 x float> - %tmp487 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp486.bc, <8 x i32> %tmp154, <4 x i32> %tmp156.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp487 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i144, float %p2.i138, <8 x i32> %tmp154, <4 x i32> %tmp156.bc, i1 0, i32 0, i32 0) %tmp488 = extractelement <4 x float> %tmp487, i32 0 %tmp489 = extractelement <4 x float> %tmp487, i32 1 %tmp490 = extractelement <4 x float> %tmp487, i32 2 @@ -1667,27 +1497,11 @@ %tmp651 = fadd float %tmp650, 1.000000e+00 %max.0.i11 = call float @llvm.maxnum.f32(float %tmp651, float 0.000000e+00) %clamp.i12 = call float @llvm.minnum.f32(float %max.0.i11, float 1.000000e+00) - %tmp653 = bitcast float %tmp642 to i32 - %tmp654 = bitcast float %tmp644 to i32 - %tmp655 = bitcast float 0.000000e+00 to i32 - %tmp656 = insertelement <4 x i32> undef, i32 %tmp653, i32 0 - %tmp657 = insertelement <4 x i32> %tmp656, i32 %tmp654, i32 1 - %tmp658 = insertelement <4 x i32> %tmp657, i32 %tmp655, i32 2 - %tmp659 = insertelement <4 x i32> %tmp658, i32 undef, i32 3 %tmp128.bc = bitcast <4 x i32> %tmp128 to <4 x i32> - %tmp659.bc = bitcast <4 x i32> %tmp659 to <4 x float> - %tmp660 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp659.bc, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp660 = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %tmp642, float %tmp644, float 0.0, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i1 0, i32 0, i32 0) %tmp661 = extractelement <4 x float> %tmp660, i32 0 %tmp662 = extractelement <4 x float> %tmp660, i32 1 - %tmp663 = bitcast float %tmp646 to i32 - %tmp664 = bitcast float %tmp648 to i32 - %tmp665 = bitcast float 0.000000e+00 to i32 - %tmp666 = insertelement <4 x i32> undef, i32 %tmp663, i32 0 - %tmp667 = insertelement <4 x i32> %tmp666, i32 %tmp664, i32 1 - %tmp668 = insertelement <4 x i32> %tmp667, i32 %tmp665, i32 2 - %tmp669 = insertelement <4 x i32> %tmp668, i32 undef, i32 3 - %tmp669.bc = bitcast <4 x i32> %tmp669 to <4 x float> - %tmp670 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp669.bc, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp670 = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %tmp646, float %tmp648, float 0.0, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i1 0, i32 0, i32 0) %tmp671 = extractelement <4 x float> %tmp670, i32 0 %tmp672 = extractelement <4 x float> %tmp670, i32 1 %tmp673 = fsub float -0.000000e+00, %tmp662 @@ -1865,10 +1679,10 @@ declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1 -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 -declare <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 -declare <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 +declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2 +declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2 +declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2 +declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1 attributes #0 = { nounwind } Index: llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll +++ llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -355,7 +355,7 @@ ; CHECK: [[END]]: ; CHECK: s_endpgm -define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, <4 x float> %arg2) #0 { +define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 { bb: %tmp = fcmp ult float %arg1, 0.000000e+00 br i1 %tmp, label %bb3, label %bb4 @@ -365,7 +365,7 @@ br label %bb4 bb4: ; preds = %bb3, %bb - %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %arg2, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp6 = extractelement <4 x float> %tmp5, i32 0 %tmp7 = fcmp une float %tmp6, 0.000000e+00 br i1 %tmp7, label %bb8, label %bb9 @@ -378,7 +378,7 @@ ret void } -declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 declare void @llvm.AMDGPU.kill(float) #0 attributes #0 = { nounwind } Index: llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll +++ llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll @@ -21,7 +21,7 @@ %tmp6 = sext i32 %tmp5 to i64 %tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i64 0, i64 %tmp6 %tmp8 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp7, align 32, !tbaa !0 - %tmp9 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> , <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp9 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float bitcast (i32 1061158912 to float), float bitcast (i32 1048576000 to float), <8 x i32> %tmp8, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp10 = extractelement <4 x float> %tmp9, i32 0 %tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef) call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0 @@ -30,7 +30,7 @@ declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 +declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1 attributes #0 = { nounwind } Index: llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll +++ llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll @@ -65,7 +65,7 @@ br label %bb4 bb9: ; preds = %bb2 - %tmp10 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp10 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp11 = extractelement <4 x float> %tmp10, i32 1 %tmp12 = extractelement <4 x float> %tmp10, i32 3 br label %bb14 @@ -97,7 +97,7 @@ declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readonly } Index: llvm/trunk/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll +++ llvm/trunk/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll @@ -34,9 +34,8 @@ %tmp = load volatile i32, i32 addrspace(1)* undef, align 4 %tmp1 = load volatile i32, i32 addrspace(1)* undef, align 4 %tmp2 = insertelement <4 x i32> undef, i32 %tmp1, i32 0 - %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1 - %tmp3.cast = bitcast <4 x i32> %tmp3 to <4 x float> - %tmp4 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tmp3.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp3 = bitcast i32 %tmp1 to float + %tmp4 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp3, float %tmp3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp5 = extractelement <4 x float> %tmp4, i32 0 %tmp6 = fmul float %tmp5, undef %tmp7 = fadd float %tmp6, %tmp6 @@ -84,7 +83,7 @@ ret void } -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readonly } Index: llvm/trunk/test/CodeGen/AMDGPU/unigine-liveness-crash.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/unigine-liveness-crash.ll +++ llvm/trunk/test/CodeGen/AMDGPU/unigine-liveness-crash.ll @@ -17,25 +17,20 @@ %j.f.i = bitcast i32 %j.i to float %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 3, i32 4, i32 %arg6) #2 %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 3, i32 4, i32 %arg6) #2 - %tmp23 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp23 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp24 = extractelement <4 x float> %tmp23, i32 3 %tmp25 = fmul float %tmp24, %tmp24 %tmp26 = fmul float %p2.i, %p2.i %tmp27 = fadd float %tmp26, %tmp26 - %tmp28 = bitcast float %tmp27 to i32 - %tmp29 = insertelement <4 x i32> undef, i32 %tmp28, i32 0 - %tmp30 = insertelement <4 x i32> %tmp29, i32 0, i32 1 - %tmp31 = insertelement <4 x i32> %tmp30, i32 undef, i32 2 - %tmp31.cast = bitcast <4 x i32> %tmp31 to <4 x float> - %tmp32 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp31.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp32 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float 0.0, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp33 = extractelement <4 x float> %tmp32, i32 0 %tmp34 = fadd float %tmp33, %tmp33 %tmp35 = fadd float %tmp34, %tmp34 %tmp36 = fadd float %tmp35, %tmp35 %tmp37 = fadd float %tmp36, %tmp36 %tmp38 = fadd float %tmp37, %tmp37 - %tmp39 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp39 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp40 = extractelement <4 x float> %tmp39, i32 0 %tmp41 = extractelement <4 x float> %tmp39, i32 1 %tmp42 = extractelement <4 x float> %tmp39, i32 2 @@ -53,17 +48,12 @@ %tmp54 = insertelement <4 x i32> %tmp53, i32 %tmp51, i32 1 %tmp55 = insertelement <4 x i32> %tmp54, i32 %tmp52, i32 2 %tmp55.cast = bitcast <4 x i32> %tmp55 to <4 x float> - %tmp56 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp55.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp56 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float %tmp48, float %tmp49, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp57 = extractelement <4 x float> %tmp56, i32 0 %tmp58 = fadd float %tmp38, %tmp57 %tmp59 = fadd float %tmp46, %tmp46 %tmp60 = fadd float %tmp47, %tmp47 - %tmp61 = bitcast float %tmp59 to i32 - %tmp62 = bitcast float %tmp60 to i32 - %tmp63 = insertelement <4 x i32> undef, i32 %tmp61, i32 1 - %tmp64 = insertelement <4 x i32> %tmp63, i32 %tmp62, i32 2 - %tmp64.cast = bitcast <4 x i32> %tmp64 to <4 x float> - %tmp65 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp64.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp65 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float undef, float %tmp59, float %tmp60, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp66 = extractelement <4 x float> %tmp65, i32 0 %tmp67 = fadd float %tmp58, %tmp66 %tmp68 = fmul float %tmp67, 1.250000e-01 @@ -101,10 +91,7 @@ br label %ENDIF25 ENDIF28: ; preds = %LOOP - %tmp85 = insertelement <4 x i32> %tmp72, i32 undef, i32 1 - %tmp86 = insertelement <4 x i32> %tmp85, i32 undef, i32 2 - %tmp86.cast = bitcast <4 x i32> %tmp86 to <4 x float> - %tmp87 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp86.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp87 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp88 = extractelement <4 x float> %tmp87, i32 0 %tmp89 = fadd float %tmp88, %tmp88 br label %LOOP @@ -114,9 +101,8 @@ declare float @llvm.maxnum.f32(float, float) #1 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 -declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 +declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2 +declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2 attributes #0 = { nounwind "InitialPSInputAddr"="36983" "target-cpu"="tonga" } attributes #1 = { nounwind readnone } Index: llvm/trunk/test/CodeGen/AMDGPU/wqm.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/wqm.ll +++ llvm/trunk/test/CodeGen/AMDGPU/wqm.ll @@ -5,10 +5,10 @@ ; ;CHECK-LABEL: {{^}}test1: ;CHECK-NOT: s_wqm -define amdgpu_ps <4 x float> @test1(<8 x i32> inreg %rsrc, <4 x i32> %c) { +define amdgpu_ps <4 x float> @test1(<8 x i32> inreg %rsrc, i32 %c) { main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) - call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %tex, <4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + %tex = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %c, <8 x i32> %rsrc, i32 0, i32 0) + call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %tex, i32 15, i32 %c, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex } @@ -30,11 +30,9 @@ %inst24 = extractelement <2 x float> %pos, i32 1 %inst25 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 0, i32 0, i32 %m0) %inst26 = tail call float @llvm.amdgcn.interp.p2(float %inst25, float %inst24, i32 0, i32 0, i32 %m0) - %inst27 = insertelement <2 x float> undef, float %inst26, i32 0 %inst28 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 1, i32 0, i32 %m0) %inst29 = tail call float @llvm.amdgcn.interp.p2(float %inst28, float %inst24, i32 1, i32 0, i32 %m0) - %inst30 = insertelement <2 x float> %inst27, float %inst29, i32 1 - %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %inst30, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %tex = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %inst26, float %inst29, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 ret <4 x float> %tex } @@ -49,9 +47,9 @@ ;CHECK: store ;CHECK-NOT: exec ;CHECK: .size test3 -define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <4 x float> %c) { +define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float %c) { main_body: - %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 %tex.1 = bitcast <4 x float> %tex to <4 x i32> %tex.2 = extractelement <4 x i32> %tex.1, i32 0 @@ -77,11 +75,9 @@ %inst24 = extractelement <2 x float> %pos, i32 1 %inst25 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 0, i32 0, i32 %m0) %inst26 = tail call float @llvm.amdgcn.interp.p2(float %inst25, float %inst24, i32 0, i32 0, i32 %m0) - %inst27 = insertelement <2 x float> undef, float %inst26, i32 0 %inst28 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 1, i32 0, i32 %m0) %inst29 = tail call float @llvm.amdgcn.interp.p2(float %inst28, float %inst24, i32 1, i32 0, i32 %m0) - %inst30 = insertelement <2 x float> %inst27, float %inst29, i32 1 - %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %inst30, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %tex = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %inst26, float %inst29, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 %tex.0 = extractelement <4 x float> %tex, i32 0 %tex.1 = extractelement <4 x float> %tex, i32 1 %tex.2 = extractelement <4 x float> %tex, i32 2 @@ -108,8 +104,9 @@ call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> undef, <4 x i32> undef, i32 %c.1, i32 0, i1 0, i1 0) %c.1.bc = bitcast i32 %c.1 to float - %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.1.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 - %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.1.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 + %tex0 = extractelement <4 x float> %tex, i32 0 + %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 ret <4 x float> %dtex } @@ -361,8 +358,9 @@ IF: %c.bc = bitcast i32 %c to float - %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 - %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 + %tex0 = extractelement <4 x float> %tex, i32 0 + %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 %data.if = extractelement <4 x float> %dtex, i32 0 br label %END @@ -403,8 +401,9 @@ IF: %c.bc = bitcast i32 %c to float - %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 - %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 + %tex0 = extractelement <4 x float> %tex, i32 0 + %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 %data.if = extractelement <4 x float> %dtex, i32 0 br label %END @@ -460,7 +459,7 @@ END: %coord.END = phi i32 [ %coord.IF, %IF ], [ %coord.ELSE, %ELSE ] %coord.END.bc = bitcast i32 %coord.END to float - %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord.END.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord.END.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 ret <4 x float> %tex } @@ -477,8 +476,9 @@ ;CHECK-DAG: store define amdgpu_ps float @test_control_flow_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %coord) { main_body: - %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 - %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 + %tex0 = extractelement <4 x float> %tex, i32 0 + %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 %dtex.1 = extractelement <4 x float> %dtex, i32 0 call void @llvm.amdgcn.buffer.store.f32(float %dtex.1, <4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0) @@ -523,8 +523,9 @@ br label %END END: - %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 - %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 + %tex0 = extractelement <4 x float> %tex, i32 0 + %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 ret <4 x float> %dtex } @@ -545,7 +546,7 @@ ;CHECK: image_sample define amdgpu_ps <4 x float> @test_kill_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <2 x i32> %idx, <2 x float> %data, float %coord, float %coord2, float %z) { main_body: - %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 %idx.0 = extractelement <2 x i32> %idx, i32 0 %data.0 = extractelement <2 x float> %data, i32 0 call void @llvm.amdgcn.buffer.store.f32(float %data.0, <4 x i32> undef, i32 %idx.0, i32 0, i1 0, i1 0) @@ -555,8 +556,9 @@ %idx.1 = extractelement <2 x i32> %idx, i32 1 %data.1 = extractelement <2 x float> %data, i32 1 call void @llvm.amdgcn.buffer.store.f32(float %data.1, <4 x i32> undef, i32 %idx.1, i32 0, i1 0, i1 0) - %tex2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 - %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex2, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %tex2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 + %tex2.0 = extractelement <4 x float> %tex2, i32 0 + %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex2.0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 %out = fadd <4 x float> %tex, %dtex ret <4 x float> %out @@ -576,8 +578,9 @@ ; CHECK: v_cmpx_ define amdgpu_ps <4 x float> @test_kill_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %data, float %coord, float %coord2, float %z) { main_body: - %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 - %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 + %tex0 = extractelement <4 x float> %tex, i32 0 + %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0) @@ -618,7 +621,7 @@ ; CHECK: ; return define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind { entry: - call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %in, <4 x i32> undef, <8 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0) + call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %in, i32 15, i32 undef, <8 x i32> undef, i32 0, i32 0) br label %loop loop: @@ -628,7 +631,8 @@ br i1 %cc, label %break, label %body body: - %c.next = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %c.iv, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %c.iv0 = extractelement <4 x float> %c.iv, i32 0 + %c.next = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.iv0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0 %ctr.next = fadd float %ctr.iv, 2.0 br label %loop @@ -669,7 +673,7 @@ %c.gep = getelementptr [32 x i32], [32 x i32] addrspace(5)* %array, i32 0, i32 %idx %c = load i32, i32 addrspace(5)* %c.gep, align 4 %c.bc = bitcast i32 %c to float - %t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %t, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0) ret void @@ -687,8 +691,9 @@ ; CHECK: s_and_b64 exec, exec, [[LIVE]] ; CHECK-NOT: exec define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind { - %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 - %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0 + %tex0 = extractelement <4 x float> %tex, i32 0 + %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0 ret <4 x float> %dtex } @@ -700,8 +705,9 @@ ; CHECK-NOT: exec define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) nounwind { entry: - %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 - %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0 + %tex0 = extractelement <4 x float> %tex, i32 0 + %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0 %cc = icmp sgt i32 %c, 0 br i1 %cc, label %if, label %else @@ -733,11 +739,11 @@ br i1 %cc, label %if, label %else if: - %r.if = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float 0.0, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %r.if = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0 br label %end else: - %r.else = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> , <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %r.else = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.0, float bitcast (i32 1 to float), <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0 br label %end end: @@ -757,8 +763,9 @@ define amdgpu_ps float @test_wwm_within_wqm(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %c, i32 %z, float %data) { main_body: %c.bc = bitcast i32 %c to float - %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 - %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 + %tex0 = extractelement <4 x float> %tex, i32 0 + %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 %cmp = icmp eq i32 %z, 0 br i1 %cmp, label %IF, label %ENDIF @@ -777,14 +784,13 @@ } declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1 -declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 +declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #1 declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #2 declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #2 -declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #3 +declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #3 declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #3 -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3 -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3 -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3 +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3 +declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3 declare void @llvm.AMDGPU.kill(float) #1 declare float @llvm.amdgcn.wqm.f32(float) #3 declare i32 @llvm.amdgcn.wqm.i32(i32) #3