Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -219,6 +219,101 @@ def int_amdgcn_image_store : AMDGPUImageStore; def int_amdgcn_image_store_mip : AMDGPUImageStore; +class AMDGPUImageSample : Intrinsic < + [llvm_anyfloat_ty], // vdata(VGPR) + [llvm_anyfloat_ty, // vaddr(VGPR) + llvm_anyint_ty, // rsrc(SGPR) + llvm_v4i32_ty, // sampler(SGPR) + llvm_i32_ty, // dmask(imm) + llvm_i1_ty, // unorm(imm) + llvm_i1_ty, // glc(imm) + llvm_i1_ty, // slc(imm) + llvm_i1_ty, // lwe(imm) + llvm_i1_ty], // da(imm) + [IntrReadMem]>; + +// Basic sample +def int_amdgcn_image_sample : AMDGPUImageSample; +def int_amdgcn_image_sample_cl : AMDGPUImageSample; +def int_amdgcn_image_sample_d : AMDGPUImageSample; +def int_amdgcn_image_sample_d_cl : AMDGPUImageSample; +def int_amdgcn_image_sample_l : AMDGPUImageSample; +def int_amdgcn_image_sample_b : AMDGPUImageSample; +def int_amdgcn_image_sample_b_cl : AMDGPUImageSample; +def int_amdgcn_image_sample_lz : AMDGPUImageSample; +def int_amdgcn_image_sample_cd : AMDGPUImageSample; +def int_amdgcn_image_sample_cd_cl : AMDGPUImageSample; + +// Sample with comparison +def int_amdgcn_image_sample_c : AMDGPUImageSample; +def int_amdgcn_image_sample_c_cl : AMDGPUImageSample; +def int_amdgcn_image_sample_c_d : AMDGPUImageSample; +def int_amdgcn_image_sample_c_d_cl : AMDGPUImageSample; +def int_amdgcn_image_sample_c_l : AMDGPUImageSample; +def int_amdgcn_image_sample_c_b : AMDGPUImageSample; +def int_amdgcn_image_sample_c_b_cl : AMDGPUImageSample; +def int_amdgcn_image_sample_c_lz : AMDGPUImageSample; +def int_amdgcn_image_sample_c_cd : AMDGPUImageSample; +def int_amdgcn_image_sample_c_cd_cl : AMDGPUImageSample; + +// Sample with offsets +def int_amdgcn_image_sample_o : AMDGPUImageSample; +def int_amdgcn_image_sample_cl_o : AMDGPUImageSample; +def int_amdgcn_image_sample_d_o : AMDGPUImageSample; +def int_amdgcn_image_sample_d_cl_o : AMDGPUImageSample; +def int_amdgcn_image_sample_l_o : AMDGPUImageSample; +def int_amdgcn_image_sample_b_o : AMDGPUImageSample; +def int_amdgcn_image_sample_b_cl_o : AMDGPUImageSample; +def int_amdgcn_image_sample_lz_o : AMDGPUImageSample; +def int_amdgcn_image_sample_cd_o : AMDGPUImageSample; +def int_amdgcn_image_sample_cd_cl_o : AMDGPUImageSample; + +// Sample with comparison and offsets +def int_amdgcn_image_sample_c_o : AMDGPUImageSample; +def int_amdgcn_image_sample_c_cl_o : AMDGPUImageSample; +def int_amdgcn_image_sample_c_d_o : AMDGPUImageSample; +def int_amdgcn_image_sample_c_d_cl_o : AMDGPUImageSample; +def int_amdgcn_image_sample_c_l_o : AMDGPUImageSample; +def int_amdgcn_image_sample_c_b_o : AMDGPUImageSample; +def int_amdgcn_image_sample_c_b_cl_o : AMDGPUImageSample; +def int_amdgcn_image_sample_c_lz_o : AMDGPUImageSample; +def int_amdgcn_image_sample_c_cd_o : AMDGPUImageSample; +def int_amdgcn_image_sample_c_cd_cl_o : AMDGPUImageSample; + +// Basic gather4 +def int_amdgcn_image_gather4 : AMDGPUImageSample; +def int_amdgcn_image_gather4_cl : AMDGPUImageSample; +def int_amdgcn_image_gather4_l : AMDGPUImageSample; +def int_amdgcn_image_gather4_b : AMDGPUImageSample; +def int_amdgcn_image_gather4_b_cl : AMDGPUImageSample; +def int_amdgcn_image_gather4_lz : AMDGPUImageSample; + +// Gather4 with comparison +def int_amdgcn_image_gather4_c : AMDGPUImageSample; +def int_amdgcn_image_gather4_c_cl : AMDGPUImageSample; +def int_amdgcn_image_gather4_c_l : AMDGPUImageSample; +def int_amdgcn_image_gather4_c_b : AMDGPUImageSample; +def int_amdgcn_image_gather4_c_b_cl : AMDGPUImageSample; +def int_amdgcn_image_gather4_c_lz : AMDGPUImageSample; + +// Gather4 with offsets +def int_amdgcn_image_gather4_o : AMDGPUImageSample; +def int_amdgcn_image_gather4_cl_o : AMDGPUImageSample; +def int_amdgcn_image_gather4_l_o : AMDGPUImageSample; +def int_amdgcn_image_gather4_b_o : AMDGPUImageSample; +def int_amdgcn_image_gather4_b_cl_o : AMDGPUImageSample; +def int_amdgcn_image_gather4_lz_o : AMDGPUImageSample; + +// Gather4 with comparison and offsets +def int_amdgcn_image_gather4_c_o : AMDGPUImageSample; +def int_amdgcn_image_gather4_c_cl_o : AMDGPUImageSample; +def int_amdgcn_image_gather4_c_l_o : AMDGPUImageSample; +def int_amdgcn_image_gather4_c_b_o : AMDGPUImageSample; +def int_amdgcn_image_gather4_c_b_cl_o : AMDGPUImageSample; +def int_amdgcn_image_gather4_c_lz_o : AMDGPUImageSample; + +def int_amdgcn_image_getlod : AMDGPUImageSample; + class AMDGPUImageAtomic : Intrinsic < [llvm_i32_ty], [llvm_i32_ty, // vdata(VGPR) Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -2536,6 +2536,31 @@ def : SampleRawPattern(opcode # _V4_V16), v16i32>; } + +// Image + sampler for amdgcn +// TODO: +// 1. Handle half data type like v4f16, and add D16 bit support; +// 2. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128). +// 3. Add A16 support when we pass address of half type. +multiclass AMDGCNSamplePattern { + def : Pat< + (v4f32 (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, i1:$glc, + i1:$slc, i1:$lwe, i1:$da)), + (opcode $addr, $rsrc, $sampler, + (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc), + 0, 0, (as_i1imm $lwe), (as_i1imm $da)) + >; +} + +multiclass AMDGCNSamplePatterns { + defm : AMDGCNSamplePattern(opcode # _V4_V1), f32>; + defm : AMDGCNSamplePattern(opcode # _V4_V2), v2f32>; + defm : AMDGCNSamplePattern(opcode # _V4_V4), v4f32>; + defm : AMDGCNSamplePattern(opcode # _V4_V8), v8f32>; + defm : AMDGCNSamplePattern(opcode # _V4_V16), v16f32>; +} + + // Image only class ImagePattern : Pat < (name vt:$addr, v8i32:$rsrc, imm:$dmask, imm:$unorm, @@ -2599,6 +2624,13 @@ sub0) >; +// ======= SI Image Intrinsics ================ + +// Image load +defm : ImagePatterns; +defm : ImagePatterns; +def : ImagePattern; + // Basic sample defm : SampleRawPatterns; defm : SampleRawPatterns; @@ -2692,13 +2724,111 @@ def : SampleRawPattern; def : SampleRawPattern; -def : ImagePattern; -defm : ImagePatterns; -defm : ImagePatterns; + +// ======= amdgcn Image Intrinsics ============== + +// Image load defm : ImageLoadPatterns; defm : ImageLoadPatterns; + +// Image store defm : ImageStorePatterns; defm : ImageStorePatterns; + +// Basic sample +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; + +// Sample with comparison +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; + +// Sample with offsets +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; + +// Sample with comparison and offsets +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; + +// Gather opcodes +// Only the variants which make sense are defined. +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; + +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; + +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; + +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; + +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; +defm : AMDGCNSamplePattern; + +// Image atomics defm : ImageAtomicPatterns; def : ImageAtomicCmpSwapPattern; def : ImageAtomicCmpSwapPattern;