Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -229,29 +229,30 @@ >; class AMDGPUImageLoad : Intrinsic < - [llvm_v4f32_ty], // vdata(VGPR) + [llvm_anyfloat_ty], // vdata(VGPR) [llvm_anyint_ty, // vaddr(VGPR) - llvm_v8i32_ty, // rsrc(SGPR) + llvm_anyint_ty, // rsrc(SGPR) llvm_i32_ty, // dmask(imm) - llvm_i1_ty, // r128(imm) - llvm_i1_ty, // da(imm) llvm_i1_ty, // glc(imm) - llvm_i1_ty], // slc(imm) + llvm_i1_ty, // slc(imm) + llvm_i1_ty, // lwe(imm) + llvm_i1_ty], // da(imm) [IntrReadMem]>; def int_amdgcn_image_load : AMDGPUImageLoad; def int_amdgcn_image_load_mip : AMDGPUImageLoad; +def int_amdgcn_image_getresinfo : AMDGPUImageLoad; class AMDGPUImageStore : Intrinsic < [], - [llvm_v4f32_ty, // vdata(VGPR) + [llvm_anyfloat_ty, // vdata(VGPR) llvm_anyint_ty, // vaddr(VGPR) - llvm_v8i32_ty, // rsrc(SGPR) + llvm_anyint_ty, // rsrc(SGPR) llvm_i32_ty, // dmask(imm) - llvm_i1_ty, // r128(imm) - llvm_i1_ty, // da(imm) llvm_i1_ty, // glc(imm) - llvm_i1_ty], // slc(imm) + llvm_i1_ty, // slc(imm) + llvm_i1_ty, // lwe(imm) + llvm_i1_ty], // da(imm) []>; def int_amdgcn_image_store : AMDGPUImageStore; Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -2446,32 +2446,36 @@ def : ImagePattern(opcode # _V4_V4), v4i32>; } -class ImageLoadPattern : Pat < - (name vt:$addr, v8i32:$rsrc, imm:$dmask, imm:$r128, imm:$da, imm:$glc, - imm:$slc), - (opcode $addr, $rsrc, +multiclass ImageLoadPattern { + def : Pat < + (v4f32 (name vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe, + i1:$da)), + (opcode $addr, $rsrc, (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc), - (as_i1imm $r128), 0, 0, (as_i1imm $da)) ->; + 0, 0, (as_i1imm $lwe), (as_i1imm $da)) + >; +} multiclass ImageLoadPatterns { - def : ImageLoadPattern(opcode # _V4_V1), i32>; - def : ImageLoadPattern(opcode # _V4_V2), v2i32>; - def : ImageLoadPattern(opcode # _V4_V4), v4i32>; + defm : ImageLoadPattern(opcode # _V4_V1), i32>; + defm : ImageLoadPattern(opcode # _V4_V2), v2i32>; + defm : ImageLoadPattern(opcode # _V4_V4), v4i32>; } -class ImageStorePattern : Pat < - (name v4f32:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, imm:$r128, imm:$da, - imm:$glc, imm:$slc), - (opcode $data, $addr, $rsrc, +multiclass ImageStorePattern { + def : Pat < + (name v4f32:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, + i1:$lwe, i1:$da), + (opcode $data, $addr, $rsrc, (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc), - (as_i1imm $r128), 0, 0, (as_i1imm $da)) ->; + 0, 0, (as_i1imm $lwe), (as_i1imm $da)) + >; +} multiclass ImageStorePatterns { - def : ImageStorePattern(opcode # _V4_V1), i32>; - def : ImageStorePattern(opcode # _V4_V2), v2i32>; - def : ImageStorePattern(opcode # _V4_V4), v4i32>; + defm : ImageStorePattern(opcode # _V4_V1), i32>; + defm : ImageStorePattern(opcode # _V4_V2), v2i32>; + defm : ImageStorePattern(opcode # _V4_V4), v4i32>; } class ImageAtomicPattern : Pat < @@ -2592,6 +2596,7 @@ defm : ImagePatterns; defm : ImageLoadPatterns; defm : ImageLoadPatterns; +defm : ImageLoadPattern; defm : ImageStorePatterns; defm : ImageStorePatterns; defm : ImageAtomicPatterns; Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.ll @@ -6,7 +6,7 @@ ;CHECK: s_waitcnt vmcnt(0) define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) { main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) ret <4 x float> %tex } @@ -15,7 +15,7 @@ ;CHECK: s_waitcnt vmcnt(0) define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) { main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.v2i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) ret <4 x float> %tex } @@ -24,7 +24,7 @@ ;CHECK: s_waitcnt vmcnt(0) define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) { main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) ret <4 x float> %tex } @@ -33,7 +33,7 @@ ;CHECK: s_waitcnt vmcnt(0) define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) { main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + %tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) ret <4 x float> %tex } @@ -42,7 +42,7 @@ ;CHECK: s_waitcnt vmcnt(0) define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) { main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) %elt = extractelement <4 x float> %tex, i32 0 ; Only first component used, test that dmask etc. is changed accordingly ret float %elt @@ -52,7 +52,7 @@ ;CHECK: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) { main_body: - call void @llvm.amdgcn.image.store.v4i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) ret void } @@ -60,7 +60,7 @@ ;CHECK: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) { main_body: - call void @llvm.amdgcn.image.store.v2i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) ret void } @@ -68,7 +68,7 @@ ;CHECK: image_store v[0:3], v4, s[0:7] dmask:0xf unorm define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) { main_body: - call void @llvm.amdgcn.image.store.i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) ret void } @@ -76,7 +76,7 @@ ;CHECK: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) { main_body: - call void @llvm.amdgcn.image.store.mip.v4i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + call void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) ret void } @@ -90,21 +90,21 @@ ;CHECK: image_store v[0:3], v4, s[16:23] dmask:0xf unorm define amdgpu_ps void @image_store_wait(<8 x i32> inreg, <8 x i32> inreg, <8 x i32> inreg, <4 x float>, i32) { main_body: - call void @llvm.amdgcn.image.store.i32(<4 x float> %3, i32 %4, <8 x i32> %0, i32 15, i1 0, i1 0, i1 0, i1 0) - %data = call <4 x float> @llvm.amdgcn.image.load.i32(i32 %4, <8 x i32> %1, i32 15, i1 0, i1 0, i1 0, i1 0) - call void @llvm.amdgcn.image.store.i32(<4 x float> %data, i32 %4, <8 x i32> %2, i32 15, i1 0, i1 0, i1 0, i1 0) + call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %3, i32 %4, <8 x i32> %0, i32 15, i1 0, i1 0, i1 0, i1 0) + %data = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %4, <8 x i32> %1, i32 15, i1 0, i1 0, i1 0, i1 0) + call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %4, <8 x i32> %2, i32 15, i1 0, i1 0, i1 0, i1 0) ret void } -declare void @llvm.amdgcn.image.store.i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.v2i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.mip.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 +declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0 +declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 +declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 +declare void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.load.i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.load.v2i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.load.mip.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readonly }