Index: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td +++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td @@ -390,175 +390,6 @@ def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">; def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">; -defset list AMDGPUImageIntrinsics = { - -class AMDGPUImageLoad : Intrinsic < - [llvm_anyfloat_ty], // vdata(VGPR) - [llvm_anyint_ty, // vaddr(VGPR) - llvm_anyint_ty, // rsrc(SGPR) - llvm_i32_ty, // dmask(imm) - llvm_i1_ty, // glc(imm) - llvm_i1_ty, // slc(imm) - llvm_i1_ty, // lwe(imm) - llvm_i1_ty], // da(imm) - !if(NoMem, [IntrNoMem], [IntrReadMem]), "", - !if(NoMem, [], [SDNPMemOperand])>, - AMDGPURsrcIntrinsic<1, 1>; - -def int_amdgcn_image_load : AMDGPUImageLoad; -def int_amdgcn_image_load_mip : AMDGPUImageLoad; -def int_amdgcn_image_getresinfo : AMDGPUImageLoad<1>; - -class AMDGPUImageStore : Intrinsic < - [], - [llvm_anyfloat_ty, // vdata(VGPR) - llvm_anyint_ty, // vaddr(VGPR) - llvm_anyint_ty, // rsrc(SGPR) - llvm_i32_ty, // dmask(imm) - llvm_i1_ty, // glc(imm) - llvm_i1_ty, // slc(imm) - llvm_i1_ty, // lwe(imm) - llvm_i1_ty], // da(imm) - [IntrWriteMem], "", [SDNPMemOperand]>, - AMDGPURsrcIntrinsic<2, 1>; - -def int_amdgcn_image_store : AMDGPUImageStore; -def int_amdgcn_image_store_mip : AMDGPUImageStore; - -class AMDGPUImageSample : Intrinsic < - [llvm_anyfloat_ty], // vdata(VGPR) - [llvm_anyfloat_ty, // vaddr(VGPR) - llvm_anyint_ty, // rsrc(SGPR) - llvm_v4i32_ty, // sampler(SGPR) - llvm_i32_ty, // dmask(imm) - llvm_i1_ty, // unorm(imm) - llvm_i1_ty, // glc(imm) - llvm_i1_ty, // slc(imm) - llvm_i1_ty, // lwe(imm) - llvm_i1_ty], // da(imm) - !if(NoMem, [IntrNoMem], [IntrReadMem]), "", - !if(NoMem, [], [SDNPMemOperand])>, - AMDGPURsrcIntrinsic<1, 1>; - -// Basic sample -def int_amdgcn_image_sample : AMDGPUImageSample; -def int_amdgcn_image_sample_cl : AMDGPUImageSample; -def int_amdgcn_image_sample_d : AMDGPUImageSample; -def int_amdgcn_image_sample_d_cl : AMDGPUImageSample; -def int_amdgcn_image_sample_l : AMDGPUImageSample; -def int_amdgcn_image_sample_b : AMDGPUImageSample; -def int_amdgcn_image_sample_b_cl : AMDGPUImageSample; -def int_amdgcn_image_sample_lz : AMDGPUImageSample; -def int_amdgcn_image_sample_cd : AMDGPUImageSample; -def int_amdgcn_image_sample_cd_cl : AMDGPUImageSample; - -// Sample with comparison -def int_amdgcn_image_sample_c : AMDGPUImageSample; -def int_amdgcn_image_sample_c_cl : AMDGPUImageSample; -def int_amdgcn_image_sample_c_d : AMDGPUImageSample; -def int_amdgcn_image_sample_c_d_cl : AMDGPUImageSample; -def int_amdgcn_image_sample_c_l : AMDGPUImageSample; -def int_amdgcn_image_sample_c_b : AMDGPUImageSample; -def int_amdgcn_image_sample_c_b_cl : AMDGPUImageSample; -def int_amdgcn_image_sample_c_lz : AMDGPUImageSample; -def int_amdgcn_image_sample_c_cd : AMDGPUImageSample; -def int_amdgcn_image_sample_c_cd_cl : AMDGPUImageSample; - -// Sample with offsets -def int_amdgcn_image_sample_o : AMDGPUImageSample; -def int_amdgcn_image_sample_cl_o : AMDGPUImageSample; -def int_amdgcn_image_sample_d_o : AMDGPUImageSample; -def int_amdgcn_image_sample_d_cl_o : AMDGPUImageSample; -def int_amdgcn_image_sample_l_o : AMDGPUImageSample; -def int_amdgcn_image_sample_b_o : AMDGPUImageSample; -def int_amdgcn_image_sample_b_cl_o : AMDGPUImageSample; -def int_amdgcn_image_sample_lz_o : AMDGPUImageSample; -def int_amdgcn_image_sample_cd_o : AMDGPUImageSample; -def int_amdgcn_image_sample_cd_cl_o : AMDGPUImageSample; - -// Sample with comparison and offsets -def int_amdgcn_image_sample_c_o : AMDGPUImageSample; -def int_amdgcn_image_sample_c_cl_o : AMDGPUImageSample; -def int_amdgcn_image_sample_c_d_o : AMDGPUImageSample; -def int_amdgcn_image_sample_c_d_cl_o : AMDGPUImageSample; -def int_amdgcn_image_sample_c_l_o : AMDGPUImageSample; -def int_amdgcn_image_sample_c_b_o : AMDGPUImageSample; -def int_amdgcn_image_sample_c_b_cl_o : AMDGPUImageSample; -def int_amdgcn_image_sample_c_lz_o : AMDGPUImageSample; -def int_amdgcn_image_sample_c_cd_o : AMDGPUImageSample; -def int_amdgcn_image_sample_c_cd_cl_o : AMDGPUImageSample; - -// Basic gather4 -def int_amdgcn_image_gather4 : AMDGPUImageSample; -def int_amdgcn_image_gather4_cl : AMDGPUImageSample; -def int_amdgcn_image_gather4_l : AMDGPUImageSample; -def int_amdgcn_image_gather4_b : AMDGPUImageSample; -def int_amdgcn_image_gather4_b_cl : AMDGPUImageSample; -def int_amdgcn_image_gather4_lz : AMDGPUImageSample; - -// Gather4 with comparison -def int_amdgcn_image_gather4_c : AMDGPUImageSample; -def int_amdgcn_image_gather4_c_cl : AMDGPUImageSample; -def int_amdgcn_image_gather4_c_l : AMDGPUImageSample; -def int_amdgcn_image_gather4_c_b : AMDGPUImageSample; -def int_amdgcn_image_gather4_c_b_cl : AMDGPUImageSample; -def int_amdgcn_image_gather4_c_lz : AMDGPUImageSample; - -// Gather4 with offsets -def int_amdgcn_image_gather4_o : AMDGPUImageSample; -def int_amdgcn_image_gather4_cl_o : AMDGPUImageSample; -def int_amdgcn_image_gather4_l_o : AMDGPUImageSample; -def int_amdgcn_image_gather4_b_o : AMDGPUImageSample; -def int_amdgcn_image_gather4_b_cl_o : AMDGPUImageSample; -def int_amdgcn_image_gather4_lz_o : AMDGPUImageSample; - -// Gather4 with comparison and offsets -def int_amdgcn_image_gather4_c_o : AMDGPUImageSample; -def int_amdgcn_image_gather4_c_cl_o : AMDGPUImageSample; -def int_amdgcn_image_gather4_c_l_o : AMDGPUImageSample; -def int_amdgcn_image_gather4_c_b_o : AMDGPUImageSample; -def int_amdgcn_image_gather4_c_b_cl_o : AMDGPUImageSample; -def int_amdgcn_image_gather4_c_lz_o : AMDGPUImageSample; - -def int_amdgcn_image_getlod : AMDGPUImageSample<1>; - -class AMDGPUImageAtomic : Intrinsic < - [llvm_i32_ty], - [llvm_i32_ty, // vdata(VGPR) - llvm_anyint_ty, // vaddr(VGPR) - llvm_v8i32_ty, // rsrc(SGPR) - llvm_i1_ty, // r128(imm) - llvm_i1_ty, // da(imm) - llvm_i1_ty], // slc(imm) - [], "", [SDNPMemOperand]>, - AMDGPURsrcIntrinsic<2, 1>; - -def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic; -def int_amdgcn_image_atomic_add : AMDGPUImageAtomic; -def int_amdgcn_image_atomic_sub : AMDGPUImageAtomic; -def int_amdgcn_image_atomic_smin : AMDGPUImageAtomic; -def int_amdgcn_image_atomic_umin : AMDGPUImageAtomic; -def int_amdgcn_image_atomic_smax : AMDGPUImageAtomic; -def int_amdgcn_image_atomic_umax : AMDGPUImageAtomic; -def int_amdgcn_image_atomic_and : AMDGPUImageAtomic; -def int_amdgcn_image_atomic_or : AMDGPUImageAtomic; -def int_amdgcn_image_atomic_xor : AMDGPUImageAtomic; -def int_amdgcn_image_atomic_inc : AMDGPUImageAtomic; -def int_amdgcn_image_atomic_dec : AMDGPUImageAtomic; -def int_amdgcn_image_atomic_cmpswap : Intrinsic < - [llvm_i32_ty], - [llvm_i32_ty, // src(VGPR) - llvm_i32_ty, // cmp(VGPR) - llvm_anyint_ty, // vaddr(VGPR) - llvm_v8i32_ty, // rsrc(SGPR) - llvm_i1_ty, // r128(imm) - llvm_i1_ty, // da(imm) - llvm_i1_ty], // slc(imm) - [], "", [SDNPMemOperand]>, - AMDGPURsrcIntrinsic<3, 1>; - -} // defset AMDGPUImageIntrinsics - } // TargetPrefix = "amdgcn" // New-style image intrinsics Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -486,90 +486,6 @@ BUFFER_ATOMIC_OR, BUFFER_ATOMIC_XOR, BUFFER_ATOMIC_CMPSWAP, - IMAGE_LOAD, - IMAGE_LOAD_MIP, - IMAGE_STORE, - IMAGE_STORE_MIP, - - // Basic sample. - IMAGE_SAMPLE, - IMAGE_SAMPLE_CL, - IMAGE_SAMPLE_D, - IMAGE_SAMPLE_D_CL, - IMAGE_SAMPLE_L, - IMAGE_SAMPLE_B, - IMAGE_SAMPLE_B_CL, - IMAGE_SAMPLE_LZ, - IMAGE_SAMPLE_CD, - IMAGE_SAMPLE_CD_CL, - - // Sample with comparison. - IMAGE_SAMPLE_C, - IMAGE_SAMPLE_C_CL, - IMAGE_SAMPLE_C_D, - IMAGE_SAMPLE_C_D_CL, - IMAGE_SAMPLE_C_L, - IMAGE_SAMPLE_C_B, - IMAGE_SAMPLE_C_B_CL, - IMAGE_SAMPLE_C_LZ, - IMAGE_SAMPLE_C_CD, - IMAGE_SAMPLE_C_CD_CL, - - // Sample with offsets. - IMAGE_SAMPLE_O, - IMAGE_SAMPLE_CL_O, - IMAGE_SAMPLE_D_O, - IMAGE_SAMPLE_D_CL_O, - IMAGE_SAMPLE_L_O, - IMAGE_SAMPLE_B_O, - IMAGE_SAMPLE_B_CL_O, - IMAGE_SAMPLE_LZ_O, - IMAGE_SAMPLE_CD_O, - IMAGE_SAMPLE_CD_CL_O, - - // Sample with comparison and offsets. - IMAGE_SAMPLE_C_O, - IMAGE_SAMPLE_C_CL_O, - IMAGE_SAMPLE_C_D_O, - IMAGE_SAMPLE_C_D_CL_O, - IMAGE_SAMPLE_C_L_O, - IMAGE_SAMPLE_C_B_O, - IMAGE_SAMPLE_C_B_CL_O, - IMAGE_SAMPLE_C_LZ_O, - IMAGE_SAMPLE_C_CD_O, - IMAGE_SAMPLE_C_CD_CL_O, - - // Basic gather4. - IMAGE_GATHER4, - IMAGE_GATHER4_CL, - IMAGE_GATHER4_L, - IMAGE_GATHER4_B, - IMAGE_GATHER4_B_CL, - IMAGE_GATHER4_LZ, - - // Gather4 with comparison. - IMAGE_GATHER4_C, - IMAGE_GATHER4_C_CL, - IMAGE_GATHER4_C_L, - IMAGE_GATHER4_C_B, - IMAGE_GATHER4_C_B_CL, - IMAGE_GATHER4_C_LZ, - - // Gather4 with offsets. - IMAGE_GATHER4_O, - IMAGE_GATHER4_CL_O, - IMAGE_GATHER4_L_O, - IMAGE_GATHER4_B_O, - IMAGE_GATHER4_B_CL_O, - IMAGE_GATHER4_LZ_O, - - // Gather4 with comparison and offsets. - IMAGE_GATHER4_C_O, - IMAGE_GATHER4_C_CL_O, - IMAGE_GATHER4_C_L_O, - IMAGE_GATHER4_C_B_O, - IMAGE_GATHER4_C_B_CL_O, - IMAGE_GATHER4_C_LZ_O, LAST_AMDGPU_ISD_NUMBER }; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4130,82 +4130,6 @@ NODE_NAME_CASE(BUFFER_ATOMIC_OR) NODE_NAME_CASE(BUFFER_ATOMIC_XOR) NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP) - NODE_NAME_CASE(IMAGE_LOAD) - NODE_NAME_CASE(IMAGE_LOAD_MIP) - NODE_NAME_CASE(IMAGE_STORE) - NODE_NAME_CASE(IMAGE_STORE_MIP) - // Basic sample. - NODE_NAME_CASE(IMAGE_SAMPLE) - NODE_NAME_CASE(IMAGE_SAMPLE_CL) - NODE_NAME_CASE(IMAGE_SAMPLE_D) - NODE_NAME_CASE(IMAGE_SAMPLE_D_CL) - NODE_NAME_CASE(IMAGE_SAMPLE_L) - NODE_NAME_CASE(IMAGE_SAMPLE_B) - NODE_NAME_CASE(IMAGE_SAMPLE_B_CL) - NODE_NAME_CASE(IMAGE_SAMPLE_LZ) - NODE_NAME_CASE(IMAGE_SAMPLE_CD) - NODE_NAME_CASE(IMAGE_SAMPLE_CD_CL) - // Sample with comparison. - NODE_NAME_CASE(IMAGE_SAMPLE_C) - NODE_NAME_CASE(IMAGE_SAMPLE_C_CL) - NODE_NAME_CASE(IMAGE_SAMPLE_C_D) - NODE_NAME_CASE(IMAGE_SAMPLE_C_D_CL) - NODE_NAME_CASE(IMAGE_SAMPLE_C_L) - NODE_NAME_CASE(IMAGE_SAMPLE_C_B) - NODE_NAME_CASE(IMAGE_SAMPLE_C_B_CL) - NODE_NAME_CASE(IMAGE_SAMPLE_C_LZ) - NODE_NAME_CASE(IMAGE_SAMPLE_C_CD) - NODE_NAME_CASE(IMAGE_SAMPLE_C_CD_CL) - // Sample with offsets. - NODE_NAME_CASE(IMAGE_SAMPLE_O) - NODE_NAME_CASE(IMAGE_SAMPLE_CL_O) - NODE_NAME_CASE(IMAGE_SAMPLE_D_O) - NODE_NAME_CASE(IMAGE_SAMPLE_D_CL_O) - NODE_NAME_CASE(IMAGE_SAMPLE_L_O) - NODE_NAME_CASE(IMAGE_SAMPLE_B_O) - NODE_NAME_CASE(IMAGE_SAMPLE_B_CL_O) - NODE_NAME_CASE(IMAGE_SAMPLE_LZ_O) - NODE_NAME_CASE(IMAGE_SAMPLE_CD_O) - NODE_NAME_CASE(IMAGE_SAMPLE_CD_CL_O) - // Sample with comparison and offsets. - NODE_NAME_CASE(IMAGE_SAMPLE_C_O) - NODE_NAME_CASE(IMAGE_SAMPLE_C_CL_O) - NODE_NAME_CASE(IMAGE_SAMPLE_C_D_O) - NODE_NAME_CASE(IMAGE_SAMPLE_C_D_CL_O) - NODE_NAME_CASE(IMAGE_SAMPLE_C_L_O) - NODE_NAME_CASE(IMAGE_SAMPLE_C_B_O) - NODE_NAME_CASE(IMAGE_SAMPLE_C_B_CL_O) - NODE_NAME_CASE(IMAGE_SAMPLE_C_LZ_O) - NODE_NAME_CASE(IMAGE_SAMPLE_C_CD_O) - NODE_NAME_CASE(IMAGE_SAMPLE_C_CD_CL_O) - // Basic gather4. - NODE_NAME_CASE(IMAGE_GATHER4) - NODE_NAME_CASE(IMAGE_GATHER4_CL) - NODE_NAME_CASE(IMAGE_GATHER4_L) - NODE_NAME_CASE(IMAGE_GATHER4_B) - NODE_NAME_CASE(IMAGE_GATHER4_B_CL) - NODE_NAME_CASE(IMAGE_GATHER4_LZ) - // Gather4 with comparison. - NODE_NAME_CASE(IMAGE_GATHER4_C) - NODE_NAME_CASE(IMAGE_GATHER4_C_CL) - NODE_NAME_CASE(IMAGE_GATHER4_C_L) - NODE_NAME_CASE(IMAGE_GATHER4_C_B) - NODE_NAME_CASE(IMAGE_GATHER4_C_B_CL) - NODE_NAME_CASE(IMAGE_GATHER4_C_LZ) - // Gather4 with offsets. - NODE_NAME_CASE(IMAGE_GATHER4_O) - NODE_NAME_CASE(IMAGE_GATHER4_CL_O) - NODE_NAME_CASE(IMAGE_GATHER4_L_O) - NODE_NAME_CASE(IMAGE_GATHER4_B_O) - NODE_NAME_CASE(IMAGE_GATHER4_B_CL_O) - NODE_NAME_CASE(IMAGE_GATHER4_LZ_O) - // Gather4 with comparison and offsets. - NODE_NAME_CASE(IMAGE_GATHER4_C_O) - NODE_NAME_CASE(IMAGE_GATHER4_C_CL_O) - NODE_NAME_CASE(IMAGE_GATHER4_C_L_O) - NODE_NAME_CASE(IMAGE_GATHER4_C_B_O) - NODE_NAME_CASE(IMAGE_GATHER4_C_B_CL_O) - NODE_NAME_CASE(IMAGE_GATHER4_C_LZ_O) case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break; } Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSearchableTables.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -26,7 +26,6 @@ } foreach intr = !listconcat(AMDGPUBufferIntrinsics, - AMDGPUImageIntrinsics, AMDGPUImageDimIntrinsics, AMDGPUImageDimAtomicIntrinsics) in { def : RsrcIntrinsic(intr)>; @@ -60,19 +59,6 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; -def : SourceOfDivergence; -def : SourceOfDivergence; -def : SourceOfDivergence; -def : SourceOfDivergence; -def : SourceOfDivergence; -def : SourceOfDivergence; -def : SourceOfDivergence; -def : SourceOfDivergence; -def : SourceOfDivergence; -def : SourceOfDivergence; -def : SourceOfDivergence; -def : SourceOfDivergence; -def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; Index: llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td @@ -10,12 +10,10 @@ // MIMG-specific encoding families to distinguish between semantically // equivalent machine instructions with different encoding. // -// - MIMGEncPseudo: pseudo instruction, only used for atomics // - MIMGEncGfx6: encoding introduced with gfx6 (obsoleted for atomics in gfx8) // - MIMGEncGfx8: encoding introduced with gfx8 for atomics class MIMGEncoding; -def MIMGEncPseudo : MIMGEncoding; def MIMGEncGfx6 : MIMGEncoding; def MIMGEncGfx8 : MIMGEncoding; @@ -244,12 +242,7 @@ multiclass MIMG_Atomic_Helper_m { - let isPseudo = 1, isCodeGenOnly = 1, MIMGEncoding = MIMGEncPseudo in { - def "" : MIMG_Atomic_Helper, - SIMCInstr; - } - - let ssamp = 0, d16 = 0, isCodeGenOnly = 0 in { + let ssamp = 0, d16 = 0 in { def _si : MIMG_Atomic_Helper, SIMCInstr, MIMGe { @@ -507,385 +500,3 @@ AMDGPUImageDimAtomicIntrinsics) in { def : ImageDimIntrinsicInfo; } - -/********** ======================= **********/ -/********** Image sampling patterns **********/ -/********** ======================= **********/ - -// ImageSample for amdgcn -// TODO: -// 1. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128). -// 2. Add A16 support when we pass address of half type. -multiclass ImageSamplePattern { - def : GCNPat< - (dt (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, i1:$glc, - i1:$slc, i1:$lwe, i1:$da)), - !con((opcode $addr, $rsrc, $sampler, (as_i32imm $dmask), (as_i1imm $unorm), - (as_i1imm $glc), (as_i1imm $slc), 0, 0, (as_i1imm $lwe), - (as_i1imm $da)), - !if(opcode.BaseOpcode.HasD16, (opcode d16), (opcode))) - >; -} - -multiclass ImageSampleDataPatterns { - defm : ImageSamplePattern(opcode # _V1), dt, f32, d16>; - defm : ImageSamplePattern(opcode # _V2), dt, v2f32, d16>; - defm : ImageSamplePattern(opcode # _V4), dt, v4f32, d16>; - defm : ImageSamplePattern(opcode # _V8), dt, v8f32, d16>; - defm : ImageSamplePattern(opcode # _V16), dt, v16f32, d16>; -} - -// ImageSample patterns. -multiclass ImageSamplePatterns { - defm : ImageSampleDataPatterns(opcode # _V1), f32, 0>; - defm : ImageSampleDataPatterns(opcode # _V2), v2f32, 0>; - defm : ImageSampleDataPatterns(opcode # _V4), v4f32, 0>; - - let SubtargetPredicate = HasUnpackedD16VMem in { - defm : ImageSampleDataPatterns(opcode # _V1), f16, 1>; - } // End HasUnpackedD16VMem. - - let SubtargetPredicate = HasPackedD16VMem in { - defm : ImageSampleDataPatterns(opcode # _V1), f16, 1>; - defm : ImageSampleDataPatterns(opcode # _V1), v2f16, 1>; - defm : ImageSampleDataPatterns(opcode # _V2), v4f16, 1>; - } // End HasPackedD16VMem. -} - -// ImageSample alternative patterns for illegal vector half Types. -multiclass ImageSampleAltPatterns { - let SubtargetPredicate = HasUnpackedD16VMem in { - defm : ImageSampleDataPatterns(opcode # _V2), v2i32, 1>; - defm : ImageSampleDataPatterns(opcode # _V4), v4i32, 1>; - } // End HasUnpackedD16VMem. -} - -// ImageGather4 patterns. -multiclass ImageGather4Patterns { - defm : ImageSampleDataPatterns(opcode # _V4), v4f32, 0>; - - let SubtargetPredicate = HasPackedD16VMem in { - defm : ImageSampleDataPatterns(opcode # _V2), v4f16, 1>; - } // End HasPackedD16VMem. -} - -// ImageGather4 alternative patterns for illegal vector half Types. -multiclass ImageGather4AltPatterns { - let SubtargetPredicate = HasUnpackedD16VMem in { - defm : ImageSampleDataPatterns(opcode # _V4), v4i32, 1>; - } // End HasUnpackedD16VMem. -} - -// ImageLoad for amdgcn. -multiclass ImageLoadPattern { - def : GCNPat < - (dt (name vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe, - i1:$da)), - !con((opcode $addr, $rsrc, (as_i32imm $dmask), 1, (as_i1imm $glc), - (as_i1imm $slc), 0, 0, (as_i1imm $lwe), (as_i1imm $da)), - !if(opcode.BaseOpcode.HasD16, (opcode d16), (opcode))) - >; -} - -multiclass ImageLoadDataPatterns { - defm : ImageLoadPattern(opcode # _V1), dt, i32, d16>; - defm : ImageLoadPattern(opcode # _V2), dt, v2i32, d16>; - defm : ImageLoadPattern(opcode # _V4), dt, v4i32, d16>; -} - -// ImageLoad patterns. -// TODO: support v3f32. -multiclass ImageLoadPatterns { - defm : ImageLoadDataPatterns(opcode # _V1), f32, 0>; - defm : ImageLoadDataPatterns(opcode # _V2), v2f32, 0>; - defm : ImageLoadDataPatterns(opcode # _V4), v4f32, 0>; - - let SubtargetPredicate = HasUnpackedD16VMem in { - defm : ImageLoadDataPatterns(opcode # _V1), f16, 1>; - } // End HasUnpackedD16VMem. - - let SubtargetPredicate = HasPackedD16VMem in { - defm : ImageLoadDataPatterns(opcode # _V1), f16, 1>; - defm : ImageLoadDataPatterns(opcode # _V1), v2f16, 1>; - defm : ImageLoadDataPatterns(opcode # _V2), v4f16, 1>; - } // End HasPackedD16VMem. -} - -// ImageLoad alternative patterns for illegal vector half Types. -multiclass ImageLoadAltPatterns { - let SubtargetPredicate = HasUnpackedD16VMem in { - defm : ImageLoadDataPatterns(opcode # _V2), v2i32, 1>; - defm : ImageLoadDataPatterns(opcode # _V4), v4i32, 1>; - } // End HasUnPackedD16VMem. -} - -// ImageStore for amdgcn. -multiclass ImageStorePattern { - def : GCNPat < - (name dt:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, - i1:$lwe, i1:$da), - !con((opcode $data, $addr, $rsrc, (as_i32imm $dmask), 1, (as_i1imm $glc), - (as_i1imm $slc), 0, 0, (as_i1imm $lwe), (as_i1imm $da)), - !if(opcode.BaseOpcode.HasD16, (opcode d16), (opcode))) - >; -} - -multiclass ImageStoreDataPatterns { - defm : ImageStorePattern(opcode # _V1), dt, i32, d16>; - defm : ImageStorePattern(opcode # _V2), dt, v2i32, d16>; - defm : ImageStorePattern(opcode # _V4), dt, v4i32, d16>; -} - -// ImageStore patterns. -// TODO: support v3f32. -multiclass ImageStorePatterns { - defm : ImageStoreDataPatterns(opcode # _V1), f32, 0>; - defm : ImageStoreDataPatterns(opcode # _V2), v2f32, 0>; - defm : ImageStoreDataPatterns(opcode # _V4), v4f32, 0>; - - let SubtargetPredicate = HasUnpackedD16VMem in { - defm : ImageStoreDataPatterns(opcode # _V1), f16, 1>; - } // End HasUnpackedD16VMem. - - let SubtargetPredicate = HasPackedD16VMem in { - defm : ImageStoreDataPatterns(opcode # _V1), f16, 1>; - defm : ImageStoreDataPatterns(opcode # _V1), v2f16, 1>; - defm : ImageStoreDataPatterns(opcode # _V2), v4f16, 1>; - } // End HasPackedD16VMem. -} - -// ImageStore alternative patterns. -multiclass ImageStoreAltPatterns { - let SubtargetPredicate = HasUnpackedD16VMem in { - defm : ImageStoreDataPatterns(opcode # _V2), v2i32, 1>; - defm : ImageStoreDataPatterns(opcode # _V4), v4i32, 1>; - } // End HasUnpackedD16VMem. - - let SubtargetPredicate = HasPackedD16VMem in { - defm : ImageStoreDataPatterns(opcode # _V1), i32, 1>; - defm : ImageStoreDataPatterns(opcode # _V2), v2i32, 1>; - } // End HasPackedD16VMem. -} - -// ImageAtomic for amdgcn. -class ImageAtomicPattern : GCNPat < - (name i32:$vdata, vt:$addr, v8i32:$rsrc, imm:$r128, imm:$da, imm:$slc), - (opcode $vdata, $addr, $rsrc, 1, 1, 1, (as_i1imm $slc), (as_i1imm $r128), 0, 0, (as_i1imm $da)) ->; - -// ImageAtomic patterns. -multiclass ImageAtomicPatterns { - def : ImageAtomicPattern(opcode # _V1_V1), i32>; - def : ImageAtomicPattern(opcode # _V1_V2), v2i32>; - def : ImageAtomicPattern(opcode # _V1_V4), v4i32>; -} - -// ImageAtomicCmpSwap for amdgcn. -class ImageAtomicCmpSwapPattern : GCNPat < - (int_amdgcn_image_atomic_cmpswap i32:$vsrc, i32:$vcmp, vt:$addr, v8i32:$rsrc, - imm:$r128, imm:$da, imm:$slc), - (EXTRACT_SUBREG - (opcode (REG_SEQUENCE VReg_64, $vsrc, sub0, $vcmp, sub1), - $addr, $rsrc, 3, 1, 1, (as_i1imm $slc), (as_i1imm $r128), 0, 0, (as_i1imm $da)), - sub0) ->; - -// ======= amdgcn Image Intrinsics ============== - -// Image load. -defm : ImageLoadPatterns; -defm : ImageLoadPatterns; -defm : ImageLoadPatterns; -defm : ImageLoadAltPatterns; -defm : ImageLoadAltPatterns; - -// Image store. -defm : ImageStorePatterns; -defm : ImageStorePatterns; -defm : ImageStoreAltPatterns; -defm : ImageStoreAltPatterns; - -// Basic sample. -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; - -// Sample with comparison. -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; - -// Sample with offsets. -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; - -// Sample with comparison and offsets. -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; - -// Basic gather4. -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; - -// Gather4 with comparison. -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; - -// Gather4 with offsets. -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; - -// Gather4 with comparison and offsets. -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; -defm : ImageGather4Patterns; - -// Basic sample alternative. -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; - -// Sample with comparison alternative. -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; - -// Sample with offsets alternative. -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; - -// Sample with comparison and offsets alternative. -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; - -// Basic gather4 alternative. -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; - -// Gather4 with comparison alternative. -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; - -// Gather4 with offsets alternative. -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; - -// Gather4 with comparison and offsets alternative. -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; -defm : ImageGather4AltPatterns; - -defm : ImageSamplePatterns; - -// Image atomics -defm : ImageAtomicPatterns; -def : ImageAtomicCmpSwapPattern; -def : ImageAtomicCmpSwapPattern; -def : ImageAtomicCmpSwapPattern; -defm : ImageAtomicPatterns; -defm : ImageAtomicPatterns; -defm : ImageAtomicPatterns; -defm : ImageAtomicPatterns; -defm : ImageAtomicPatterns; -defm : ImageAtomicPatterns; -defm : ImageAtomicPatterns; -defm : ImageAtomicPatterns; -defm : ImageAtomicPatterns; -defm : ImageAtomicPatterns; -defm : ImageAtomicPatterns; Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3519,163 +3519,6 @@ return SDValue(); } -static unsigned getImageOpcode(unsigned IID) { - switch (IID) { - case Intrinsic::amdgcn_image_load: - return AMDGPUISD::IMAGE_LOAD; - case Intrinsic::amdgcn_image_load_mip: - return AMDGPUISD::IMAGE_LOAD_MIP; - - // Basic sample. - case Intrinsic::amdgcn_image_sample: - return AMDGPUISD::IMAGE_SAMPLE; - case Intrinsic::amdgcn_image_sample_cl: - return AMDGPUISD::IMAGE_SAMPLE_CL; - case Intrinsic::amdgcn_image_sample_d: - return AMDGPUISD::IMAGE_SAMPLE_D; - case Intrinsic::amdgcn_image_sample_d_cl: - return AMDGPUISD::IMAGE_SAMPLE_D_CL; - case Intrinsic::amdgcn_image_sample_l: - return AMDGPUISD::IMAGE_SAMPLE_L; - case Intrinsic::amdgcn_image_sample_b: - return AMDGPUISD::IMAGE_SAMPLE_B; - case Intrinsic::amdgcn_image_sample_b_cl: - return AMDGPUISD::IMAGE_SAMPLE_B_CL; - case Intrinsic::amdgcn_image_sample_lz: - return AMDGPUISD::IMAGE_SAMPLE_LZ; - case Intrinsic::amdgcn_image_sample_cd: - return AMDGPUISD::IMAGE_SAMPLE_CD; - case Intrinsic::amdgcn_image_sample_cd_cl: - return AMDGPUISD::IMAGE_SAMPLE_CD_CL; - - // Sample with comparison. - case Intrinsic::amdgcn_image_sample_c: - return AMDGPUISD::IMAGE_SAMPLE_C; - case Intrinsic::amdgcn_image_sample_c_cl: - return AMDGPUISD::IMAGE_SAMPLE_C_CL; - case Intrinsic::amdgcn_image_sample_c_d: - return AMDGPUISD::IMAGE_SAMPLE_C_D; - case Intrinsic::amdgcn_image_sample_c_d_cl: - return AMDGPUISD::IMAGE_SAMPLE_C_D_CL; - case Intrinsic::amdgcn_image_sample_c_l: - return AMDGPUISD::IMAGE_SAMPLE_C_L; - case Intrinsic::amdgcn_image_sample_c_b: - return AMDGPUISD::IMAGE_SAMPLE_C_B; - case Intrinsic::amdgcn_image_sample_c_b_cl: - return AMDGPUISD::IMAGE_SAMPLE_C_B_CL; - case Intrinsic::amdgcn_image_sample_c_lz: - return AMDGPUISD::IMAGE_SAMPLE_C_LZ; - case Intrinsic::amdgcn_image_sample_c_cd: - return AMDGPUISD::IMAGE_SAMPLE_C_CD; - case Intrinsic::amdgcn_image_sample_c_cd_cl: - return AMDGPUISD::IMAGE_SAMPLE_C_CD_CL; - - // Sample with offsets. - case Intrinsic::amdgcn_image_sample_o: - return AMDGPUISD::IMAGE_SAMPLE_O; - case Intrinsic::amdgcn_image_sample_cl_o: - return AMDGPUISD::IMAGE_SAMPLE_CL_O; - case Intrinsic::amdgcn_image_sample_d_o: - return AMDGPUISD::IMAGE_SAMPLE_D_O; - case Intrinsic::amdgcn_image_sample_d_cl_o: - return AMDGPUISD::IMAGE_SAMPLE_D_CL_O; - case Intrinsic::amdgcn_image_sample_l_o: - return AMDGPUISD::IMAGE_SAMPLE_L_O; - case Intrinsic::amdgcn_image_sample_b_o: - return AMDGPUISD::IMAGE_SAMPLE_B_O; - case Intrinsic::amdgcn_image_sample_b_cl_o: - return AMDGPUISD::IMAGE_SAMPLE_B_CL_O; - case Intrinsic::amdgcn_image_sample_lz_o: - return AMDGPUISD::IMAGE_SAMPLE_LZ_O; - case Intrinsic::amdgcn_image_sample_cd_o: - return AMDGPUISD::IMAGE_SAMPLE_CD_O; - case Intrinsic::amdgcn_image_sample_cd_cl_o: - return AMDGPUISD::IMAGE_SAMPLE_CD_CL_O; - - // Sample with comparison and offsets. - case Intrinsic::amdgcn_image_sample_c_o: - return AMDGPUISD::IMAGE_SAMPLE_C_O; - case Intrinsic::amdgcn_image_sample_c_cl_o: - return AMDGPUISD::IMAGE_SAMPLE_C_CL_O; - case Intrinsic::amdgcn_image_sample_c_d_o: - return AMDGPUISD::IMAGE_SAMPLE_C_D_O; - case Intrinsic::amdgcn_image_sample_c_d_cl_o: - return AMDGPUISD::IMAGE_SAMPLE_C_D_CL_O; - case Intrinsic::amdgcn_image_sample_c_l_o: - return AMDGPUISD::IMAGE_SAMPLE_C_L_O; - case Intrinsic::amdgcn_image_sample_c_b_o: - return AMDGPUISD::IMAGE_SAMPLE_C_B_O; - case Intrinsic::amdgcn_image_sample_c_b_cl_o: - return AMDGPUISD::IMAGE_SAMPLE_C_B_CL_O; - case Intrinsic::amdgcn_image_sample_c_lz_o: - return AMDGPUISD::IMAGE_SAMPLE_C_LZ_O; - case Intrinsic::amdgcn_image_sample_c_cd_o: - return AMDGPUISD::IMAGE_SAMPLE_C_CD_O; - case Intrinsic::amdgcn_image_sample_c_cd_cl_o: - return AMDGPUISD::IMAGE_SAMPLE_C_CD_CL_O; - - // Basic gather4. - case Intrinsic::amdgcn_image_gather4: - return AMDGPUISD::IMAGE_GATHER4; - case Intrinsic::amdgcn_image_gather4_cl: - return AMDGPUISD::IMAGE_GATHER4_CL; - case Intrinsic::amdgcn_image_gather4_l: - return AMDGPUISD::IMAGE_GATHER4_L; - case Intrinsic::amdgcn_image_gather4_b: - return AMDGPUISD::IMAGE_GATHER4_B; - case Intrinsic::amdgcn_image_gather4_b_cl: - return AMDGPUISD::IMAGE_GATHER4_B_CL; - case Intrinsic::amdgcn_image_gather4_lz: - return AMDGPUISD::IMAGE_GATHER4_LZ; - - // Gather4 with comparison. - case Intrinsic::amdgcn_image_gather4_c: - return AMDGPUISD::IMAGE_GATHER4_C; - case Intrinsic::amdgcn_image_gather4_c_cl: - return AMDGPUISD::IMAGE_GATHER4_C_CL; - case Intrinsic::amdgcn_image_gather4_c_l: - return AMDGPUISD::IMAGE_GATHER4_C_L; - case Intrinsic::amdgcn_image_gather4_c_b: - return AMDGPUISD::IMAGE_GATHER4_C_B; - case Intrinsic::amdgcn_image_gather4_c_b_cl: - return AMDGPUISD::IMAGE_GATHER4_C_B_CL; - case Intrinsic::amdgcn_image_gather4_c_lz: - return AMDGPUISD::IMAGE_GATHER4_C_LZ; - - // Gather4 with offsets. - case Intrinsic::amdgcn_image_gather4_o: - return AMDGPUISD::IMAGE_GATHER4_O; - case Intrinsic::amdgcn_image_gather4_cl_o: - return AMDGPUISD::IMAGE_GATHER4_CL_O; - case Intrinsic::amdgcn_image_gather4_l_o: - return AMDGPUISD::IMAGE_GATHER4_L_O; - case Intrinsic::amdgcn_image_gather4_b_o: - return AMDGPUISD::IMAGE_GATHER4_B_O; - case Intrinsic::amdgcn_image_gather4_b_cl_o: - return AMDGPUISD::IMAGE_GATHER4_B_CL_O; - case Intrinsic::amdgcn_image_gather4_lz_o: - return AMDGPUISD::IMAGE_GATHER4_LZ_O; - - // Gather4 with comparison and offsets. - case Intrinsic::amdgcn_image_gather4_c_o: - return AMDGPUISD::IMAGE_GATHER4_C_O; - case Intrinsic::amdgcn_image_gather4_c_cl_o: - return AMDGPUISD::IMAGE_GATHER4_C_CL_O; - case Intrinsic::amdgcn_image_gather4_c_l_o: - return AMDGPUISD::IMAGE_GATHER4_C_L_O; - case Intrinsic::amdgcn_image_gather4_c_b_o: - return AMDGPUISD::IMAGE_GATHER4_C_B_O; - case Intrinsic::amdgcn_image_gather4_c_b_cl_o: - return AMDGPUISD::IMAGE_GATHER4_C_B_CL_O; - case Intrinsic::amdgcn_image_gather4_c_lz_o: - return AMDGPUISD::IMAGE_GATHER4_C_LZ_O; - - default: - break; - } - return 0; -} - static SDValue adjustLoadValueTypeImpl(SDValue Result, EVT LoadVT, const SDLoc &DL, SelectionDAG &DAG, bool Unpacked) { @@ -5081,16 +4924,6 @@ return SDValue(DAG.getMachineNode(AMDGPU::WWM, DL, Src.getValueType(), Src), 0); } - case Intrinsic::amdgcn_image_getlod: - case Intrinsic::amdgcn_image_getresinfo: { - unsigned Idx = (IntrinsicID == Intrinsic::amdgcn_image_getresinfo) ? 3 : 4; - - // Replace dmask with everything disabled with undef. - const ConstantSDNode *DMask = dyn_cast(Op.getOperand(Idx)); - if (!DMask || DMask->isNullValue()) - return DAG.getUNDEF(Op.getValueType()); - return SDValue(); - } default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) @@ -5269,113 +5102,6 @@ Op->getVTList(), Ops, VT, M->getMemOperand()); } - case Intrinsic::amdgcn_image_load: - case Intrinsic::amdgcn_image_load_mip: { - EVT VT = Op.getValueType(); - if (Subtarget->hasUnpackedD16VMem() && - VT.isVector() && VT.getScalarSizeInBits() == 16) { - return adjustLoadValueType(getImageOpcode(IntrID), cast(Op), - DAG); - } - - return SDValue(); - } - - // Basic sample. - case Intrinsic::amdgcn_image_sample: - case Intrinsic::amdgcn_image_sample_cl: - case Intrinsic::amdgcn_image_sample_d: - case Intrinsic::amdgcn_image_sample_d_cl: - case Intrinsic::amdgcn_image_sample_l: - case Intrinsic::amdgcn_image_sample_b: - case Intrinsic::amdgcn_image_sample_b_cl: - case Intrinsic::amdgcn_image_sample_lz: - case Intrinsic::amdgcn_image_sample_cd: - case Intrinsic::amdgcn_image_sample_cd_cl: - - // Sample with comparison. - case Intrinsic::amdgcn_image_sample_c: - case Intrinsic::amdgcn_image_sample_c_cl: - case Intrinsic::amdgcn_image_sample_c_d: - case Intrinsic::amdgcn_image_sample_c_d_cl: - case Intrinsic::amdgcn_image_sample_c_l: - case Intrinsic::amdgcn_image_sample_c_b: - case Intrinsic::amdgcn_image_sample_c_b_cl: - case Intrinsic::amdgcn_image_sample_c_lz: - case Intrinsic::amdgcn_image_sample_c_cd: - case Intrinsic::amdgcn_image_sample_c_cd_cl: - - // Sample with offsets. - case Intrinsic::amdgcn_image_sample_o: - case Intrinsic::amdgcn_image_sample_cl_o: - case Intrinsic::amdgcn_image_sample_d_o: - case Intrinsic::amdgcn_image_sample_d_cl_o: - case Intrinsic::amdgcn_image_sample_l_o: - case Intrinsic::amdgcn_image_sample_b_o: - case Intrinsic::amdgcn_image_sample_b_cl_o: - case Intrinsic::amdgcn_image_sample_lz_o: - case Intrinsic::amdgcn_image_sample_cd_o: - case Intrinsic::amdgcn_image_sample_cd_cl_o: - - // Sample with comparison and offsets. - case Intrinsic::amdgcn_image_sample_c_o: - case Intrinsic::amdgcn_image_sample_c_cl_o: - case Intrinsic::amdgcn_image_sample_c_d_o: - case Intrinsic::amdgcn_image_sample_c_d_cl_o: - case Intrinsic::amdgcn_image_sample_c_l_o: - case Intrinsic::amdgcn_image_sample_c_b_o: - case Intrinsic::amdgcn_image_sample_c_b_cl_o: - case Intrinsic::amdgcn_image_sample_c_lz_o: - case Intrinsic::amdgcn_image_sample_c_cd_o: - case Intrinsic::amdgcn_image_sample_c_cd_cl_o: - - // Basic gather4 - case Intrinsic::amdgcn_image_gather4: - case Intrinsic::amdgcn_image_gather4_cl: - case Intrinsic::amdgcn_image_gather4_l: - case Intrinsic::amdgcn_image_gather4_b: - case Intrinsic::amdgcn_image_gather4_b_cl: - case Intrinsic::amdgcn_image_gather4_lz: - - // Gather4 with comparison - case Intrinsic::amdgcn_image_gather4_c: - case Intrinsic::amdgcn_image_gather4_c_cl: - case Intrinsic::amdgcn_image_gather4_c_l: - case Intrinsic::amdgcn_image_gather4_c_b: - case Intrinsic::amdgcn_image_gather4_c_b_cl: - case Intrinsic::amdgcn_image_gather4_c_lz: - - // Gather4 with offsets - case Intrinsic::amdgcn_image_gather4_o: - case Intrinsic::amdgcn_image_gather4_cl_o: - case Intrinsic::amdgcn_image_gather4_l_o: - case Intrinsic::amdgcn_image_gather4_b_o: - case Intrinsic::amdgcn_image_gather4_b_cl_o: - case Intrinsic::amdgcn_image_gather4_lz_o: - - // Gather4 with comparison and offsets - case Intrinsic::amdgcn_image_gather4_c_o: - case Intrinsic::amdgcn_image_gather4_c_cl_o: - case Intrinsic::amdgcn_image_gather4_c_l_o: - case Intrinsic::amdgcn_image_gather4_c_b_o: - case Intrinsic::amdgcn_image_gather4_c_b_cl_o: - case Intrinsic::amdgcn_image_gather4_c_lz_o: { - // Replace dmask with everything disabled with undef. - const ConstantSDNode *DMask = dyn_cast(Op.getOperand(5)); - if (!DMask || DMask->isNullValue()) { - SDValue Undef = DAG.getUNDEF(Op.getValueType()); - return DAG.getMergeValues({ Undef, Op.getOperand(0) }, SDLoc(Op)); - } - - if (Subtarget->hasUnpackedD16VMem() && - Op.getValueType().isVector() && - Op.getValueType().getScalarSizeInBits() == 16) { - return adjustLoadValueType(getImageOpcode(IntrID), cast(Op), - DAG); - } - - return SDValue(); - } default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrID)) @@ -5599,35 +5325,6 @@ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, M->getMemoryVT(), M->getMemOperand()); } - case Intrinsic::amdgcn_image_store: - case Intrinsic::amdgcn_image_store_mip: { - SDValue VData = Op.getOperand(2); - EVT VT = VData.getValueType(); - if (Subtarget->hasUnpackedD16VMem() && - VT.isVector() && VT.getScalarSizeInBits() == 16) { - SDValue Chain = Op.getOperand(0); - - VData = handleD16VData(VData, DAG); - SDValue Ops[] = { - Chain, // Chain - VData, // vdata - Op.getOperand(3), // vaddr - Op.getOperand(4), // rsrc - Op.getOperand(5), // dmask - Op.getOperand(6), // glc - Op.getOperand(7), // slc - Op.getOperand(8), // lwe - Op.getOperand(9) // da - }; - unsigned Opc = (IntrinsicID == Intrinsic::amdgcn_image_store) ? - AMDGPUISD::IMAGE_STORE : AMDGPUISD::IMAGE_STORE_MIP; - MemSDNode *M = cast(Op); - return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, - M->getMemoryVT(), M->getMemOperand()); - } - - return SDValue(); - } default: { if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -172,134 +172,6 @@ [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] >; -def SDTImage_load : SDTypeProfile<1, 7, - [ - SDTCisInt<1>, // vaddr - SDTCisInt<2>, // rsrc - SDTCisVT<3, i32>, // dmask - SDTCisVT<4, i1>, // glc - SDTCisVT<5, i1>, // slc - SDTCisVT<6, i1>, // lwe - SDTCisVT<7, i1> // da - ]>; -def SIImage_load : SDNode<"AMDGPUISD::IMAGE_LOAD", SDTImage_load, - [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; -def SIImage_load_mip : SDNode<"AMDGPUISD::IMAGE_LOAD_MIP", SDTImage_load, - [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; - -def SDTImage_store : SDTypeProfile<0, 8, - [ - SDTCisInt<1>, // vaddr - SDTCisInt<2>, // rsrc - SDTCisVT<3, i32>, // dmask - SDTCisVT<4, i1>, // glc - SDTCisVT<5, i1>, // slc - SDTCisVT<6, i1>, // lwe - SDTCisVT<7, i1> // da - ]>; -def SIImage_store : SDNode <"AMDGPUISD::IMAGE_STORE", - SDTImage_store, - [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; -def SIImage_store_mip : SDNode <"AMDGPUISD::IMAGE_STORE_MIP", - SDTImage_store, - [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; - -class SDTImage_sample : SDNode , // vaddr - SDTCisInt<2>, // rsrc - SDTCisVT<3, v4i32>, // sampler - SDTCisVT<4, i32>, // dmask - SDTCisVT<5, i1>, // unorm - SDTCisVT<6, i1>, // glc - SDTCisVT<7, i1>, // slc - SDTCisVT<8, i1>, // lwe - SDTCisVT<9, i1> // da - ]>, - [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] ->; - -// Basic sample. -def SIImage_sample : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE">; -def SIImage_sample_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CL">; -def SIImage_sample_d : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_D">; -def SIImage_sample_d_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_D_CL">; -def SIImage_sample_l : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_L">; -def SIImage_sample_b : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_B">; -def SIImage_sample_b_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_B_CL">; -def SIImage_sample_lz : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_LZ">; -def SIImage_sample_cd : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CD">; -def SIImage_sample_cd_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CD_CL">; - -// Sample with comparison. -def SIImage_sample_c : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C">; -def SIImage_sample_c_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CL">; -def SIImage_sample_c_d : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_D">; -def SIImage_sample_c_d_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_D_CL">; -def SIImage_sample_c_l : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_L">; -def SIImage_sample_c_b : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_B">; -def SIImage_sample_c_b_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_B_CL">; -def SIImage_sample_c_lz : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_LZ">; -def SIImage_sample_c_cd : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CD">; -def SIImage_sample_c_cd_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CD_CL">; - -// Sample with offsets. -def SIImage_sample_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_O">; -def SIImage_sample_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CL_O">; -def SIImage_sample_d_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_D_O">; -def SIImage_sample_d_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_D_CL_O">; -def SIImage_sample_l_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_L_O">; -def SIImage_sample_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_B_O">; -def SIImage_sample_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_B_CL_O">; -def SIImage_sample_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_LZ_O">; -def SIImage_sample_cd_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CD_O">; -def SIImage_sample_cd_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CD_CL_O">; - -// Sample with comparison and offsets. -def SIImage_sample_c_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_O">; -def SIImage_sample_c_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CL_O">; -def SIImage_sample_c_d_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_D_O">; -def SIImage_sample_c_d_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_D_CL_O">; -def SIImage_sample_c_l_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_L_O">; -def SIImage_sample_c_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_B_O">; -def SIImage_sample_c_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_B_CL_O">; -def SIImage_sample_c_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_LZ_O">; -def SIImage_sample_c_cd_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CD_O">; -def SIImage_sample_c_cd_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CD_CL_O">; - -// Basic gather4. -def SIImage_gather4 : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4">; -def SIImage_gather4_cl : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_CL">; -def SIImage_gather4_l : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_L">; -def SIImage_gather4_b : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_B">; -def SIImage_gather4_b_cl : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_B_CL">; -def SIImage_gather4_lz : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_LZ">; - -// Gather4 with comparison. -def SIImage_gather4_c : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C">; -def SIImage_gather4_c_cl : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_CL">; -def SIImage_gather4_c_l : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_L">; -def SIImage_gather4_c_b : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B">; -def SIImage_gather4_c_b_cl : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_CL">; -def SIImage_gather4_c_lz : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_LZ">; - -// Gather4 with offsets. -def SIImage_gather4_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_O">; -def SIImage_gather4_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_CL_O">; -def SIImage_gather4_l_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_L_O">; -def SIImage_gather4_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_B_O">; -def SIImage_gather4_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_B_CL_O">; -def SIImage_gather4_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_LZ_O">; - -// Gather4 with comparison and offsets. -def SIImage_gather4_c_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_O">; -def SIImage_gather4_c_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_CL_O">; -def SIImage_gather4_c_l_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_L_O">; -def SIImage_gather4_c_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_O">; -def SIImage_gather4_c_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_CL_O">; -def SIImage_gather4_c_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_LZ_O">; - def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]> >; Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1650,57 +1650,7 @@ break; case Intrinsic::amdgcn_buffer_load: case Intrinsic::amdgcn_buffer_load_format: - case Intrinsic::amdgcn_image_sample: - case Intrinsic::amdgcn_image_sample_cl: - case Intrinsic::amdgcn_image_sample_d: - case Intrinsic::amdgcn_image_sample_d_cl: - case Intrinsic::amdgcn_image_sample_l: - case Intrinsic::amdgcn_image_sample_b: - case Intrinsic::amdgcn_image_sample_b_cl: - case Intrinsic::amdgcn_image_sample_lz: - case Intrinsic::amdgcn_image_sample_cd: - case Intrinsic::amdgcn_image_sample_cd_cl: - - case Intrinsic::amdgcn_image_sample_c: - case Intrinsic::amdgcn_image_sample_c_cl: - case Intrinsic::amdgcn_image_sample_c_d: - case Intrinsic::amdgcn_image_sample_c_d_cl: - case Intrinsic::amdgcn_image_sample_c_l: - case Intrinsic::amdgcn_image_sample_c_b: - case Intrinsic::amdgcn_image_sample_c_b_cl: - case Intrinsic::amdgcn_image_sample_c_lz: - case Intrinsic::amdgcn_image_sample_c_cd: - case Intrinsic::amdgcn_image_sample_c_cd_cl: - - case Intrinsic::amdgcn_image_sample_o: - case Intrinsic::amdgcn_image_sample_cl_o: - case Intrinsic::amdgcn_image_sample_d_o: - case Intrinsic::amdgcn_image_sample_d_cl_o: - case Intrinsic::amdgcn_image_sample_l_o: - case Intrinsic::amdgcn_image_sample_b_o: - case Intrinsic::amdgcn_image_sample_b_cl_o: - case Intrinsic::amdgcn_image_sample_lz_o: - case Intrinsic::amdgcn_image_sample_cd_o: - case Intrinsic::amdgcn_image_sample_cd_cl_o: - - case Intrinsic::amdgcn_image_sample_c_o: - case Intrinsic::amdgcn_image_sample_c_cl_o: - case Intrinsic::amdgcn_image_sample_c_d_o: - case Intrinsic::amdgcn_image_sample_c_d_cl_o: - case Intrinsic::amdgcn_image_sample_c_l_o: - case Intrinsic::amdgcn_image_sample_c_b_o: - case Intrinsic::amdgcn_image_sample_c_b_cl_o: - case Intrinsic::amdgcn_image_sample_c_lz_o: - case Intrinsic::amdgcn_image_sample_c_cd_o: - case Intrinsic::amdgcn_image_sample_c_cd_cl_o: - - case Intrinsic::amdgcn_image_getlod: { - auto IID = II->getIntrinsicID(); - bool IsBuffer = IID == Intrinsic::amdgcn_buffer_load || - IID == Intrinsic::amdgcn_buffer_load_format; - return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, - IsBuffer ? -1 : 3); - } + return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts); default: { if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID())) return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0); Index: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subrange-crash.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subrange-crash.ll +++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subrange-crash.ll @@ -1,66 +0,0 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -; -; This testcase used to cause the following crash: -; -; *** Couldn't join subrange! -; -; UNREACHABLE executed at lib/CodeGen/RegisterCoalescer.cpp:2666! -; -; The insertelement instructions became subregister definitions: one virtual -; register was defined and re-defined by one group of the consecutive insert- -; elements, and another was defined by the second group. -; Since a copy between the two full registers was present in the program, -; the coalescer tried to merge them. The join algorithm for the main range -; decided that it was correct to do so, while the subrange join unexpectedly -; failed. This was caused by the live interval subranges not being computed -; correctly: subregister defs are not uses for the purpose of subranges. -; -; Test for a valid output: -; CHECK: image_sample_c_d_o -define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg, [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg1, [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg2, [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg3, [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg4, float inreg %arg5, i32 inreg %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <3 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, <2 x i32> %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, i32 %arg20, float %arg21, i32 %arg22) #0 { -main_body: - %i.i = extractelement <2 x i32> %arg8, i32 0 - %j.i = extractelement <2 x i32> %arg8, i32 1 - %i.f.i = bitcast i32 %i.i to float - %j.f.i = bitcast i32 %j.i to float - %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 3, i32 0, i32 %arg6) #1 - %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 3, i32 0, i32 %arg6) #1 - %tmp23 = fadd float %p2.i, 0xBFA99999A0000000 - %tmp24 = fadd float %p2.i, 0x3FA99999A0000000 - %tmp25 = bitcast float %tmp23 to i32 - %tmp26 = insertelement <16 x i32> , i32 %tmp25, i32 1 - %tmp27 = insertelement <16 x i32> %tmp26, i32 undef, i32 2 - %tmp28 = insertelement <16 x i32> %tmp27, i32 undef, i32 3 - %tmp29 = insertelement <16 x i32> %tmp28, i32 undef, i32 4 - %tmp30 = insertelement <16 x i32> %tmp29, i32 0, i32 5 - %tmp31 = insertelement <16 x i32> %tmp30, i32 undef, i32 6 - %tmp32 = insertelement <16 x i32> %tmp31, i32 undef, i32 7 - %tmp33 = insertelement <16 x i32> %tmp32, i32 undef, i32 8 - %tmp33.bc = bitcast <16 x i32> %tmp33 to <16 x float> - %tmp34 = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v16f32.v8i32(<16 x float> %tmp33.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true) - %tmp35 = extractelement <4 x float> %tmp34, i32 0 - %tmp36 = bitcast float %tmp24 to i32 - %tmp37 = insertelement <16 x i32> , i32 %tmp36, i32 1 - %tmp38 = insertelement <16 x i32> %tmp37, i32 undef, i32 2 - %tmp39 = insertelement <16 x i32> %tmp38, i32 undef, i32 3 - %tmp40 = insertelement <16 x i32> %tmp39, i32 undef, i32 4 - %tmp41 = insertelement <16 x i32> %tmp40, i32 0, i32 5 - %tmp42 = insertelement <16 x i32> %tmp41, i32 undef, i32 6 - %tmp43 = insertelement <16 x i32> %tmp42, i32 undef, i32 7 - %tmp44 = insertelement <16 x i32> %tmp43, i32 undef, i32 8 - %tmp44.bc = bitcast <16 x i32> %tmp44 to <16 x float> - %tmp45 = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v16f32.v8i32(<16 x float> %tmp44.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true) - %tmp46 = extractelement <4 x float> %tmp45, i32 0 - %tmp47 = fmul float %tmp35, %tmp46 - %tmp48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, float %tmp47, 14 - %tmp49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %tmp48, float %arg21, 24 - ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %tmp49 -} - -declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 -declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v16f32.v8i32(<16 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } -attributes #2 = { nounwind readonly } Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.ll @@ -1,130 +0,0 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=SI -;RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=VI - -;CHECK-LABEL: {{^}}image_atomic_swap: -;CHECK-NOT: s_waitcnt -;SI: image_atomic_swap v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0x00,0x04,0x00,0x00] -;VI: image_atomic_swap v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -define amdgpu_ps float @image_atomic_swap(<8 x i32> inreg, <4 x i32>, i32) { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.swap.v4i32(i32 %2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %orig.f = bitcast i32 %orig to float - ret float %orig.f -} - -;CHECK-LABEL: {{^}}image_atomic_swap_v2i32: -;CHECK-NOT: s_waitcnt -;SI: image_atomic_swap v2, v[0:1], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0x00,0x02,0x00,0x00] -;VI: image_atomic_swap v2, v[0:1], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0x00,0x02,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -define amdgpu_ps float @image_atomic_swap_v2i32(<8 x i32> inreg, <2 x i32>, i32) { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.swap.v2i32(i32 %2, <2 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %orig.f = bitcast i32 %orig to float - ret float %orig.f -} - -;CHECK-LABEL: {{^}}image_atomic_swap_i32: -;CHECK-NOT: s_waitcnt -;SI: image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0x00,0x01,0x00,0x00] -;VI: image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0x00,0x01,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -define amdgpu_ps float @image_atomic_swap_i32(<8 x i32> inreg, i32, i32) { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(i32 %2, i32 %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %orig.f = bitcast i32 %orig to float - ret float %orig.f -} - -;CHECK-LABEL: {{^}}image_atomic_cmpswap: -;CHECK-NOT: s_waitcnt -;SI: image_atomic_cmpswap v[4:5], v[0:3], s[0:7] dmask:0x3 unorm glc ; encoding: [0x00,0x33,0x40,0xf0,0x00,0x04,0x00,0x00] -;VI: image_atomic_cmpswap v[4:5], v[0:3], s[0:7] dmask:0x3 unorm glc ; encoding: [0x00,0x33,0x44,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: v_mov_b32_e32 v0, v4 -define amdgpu_ps float @image_atomic_cmpswap(<8 x i32> inreg, <4 x i32>, i32, i32) { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.v4i32(i32 %2, i32 %3, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %orig.f = bitcast i32 %orig to float - ret float %orig.f -} - -;CHECK-LABEL: {{^}}image_atomic_add: -;CHECK-NOT: s_waitcnt -;SI: image_atomic_add v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x44,0xf0,0x00,0x04,0x00,0x00] -;VI: image_atomic_add v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x48,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -define amdgpu_ps float @image_atomic_add(<8 x i32> inreg, <4 x i32>, i32) { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.add.v4i32(i32 %2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %orig.f = bitcast i32 %orig to float - ret float %orig.f -} - -;CHECK-LABEL: {{^}}image_atomic_sub: -;CHECK-NOT: s_waitcnt -;SI: image_atomic_sub v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x48,0xf0,0x00,0x04,0x00,0x00] -;VI: image_atomic_sub v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x4c,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -define amdgpu_ps float @image_atomic_sub(<8 x i32> inreg, <4 x i32>, i32) { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.sub.v4i32(i32 %2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %orig.f = bitcast i32 %orig to float - ret float %orig.f -} - -;CHECK-LABEL: {{^}}image_atomic_unchanged: -;CHECK-NOT: s_waitcnt -;CHECK: image_atomic_smin v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x50,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: image_atomic_umin v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x54,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: image_atomic_smax v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x58,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: image_atomic_umax v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x5c,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: image_atomic_and v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x60,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: image_atomic_or v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x64,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: image_atomic_xor v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x68,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: image_atomic_inc v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x6c,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -;CHECK: image_atomic_dec v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x70,0xf0,0x00,0x04,0x00,0x00] -;CHECK: s_waitcnt vmcnt(0) -define amdgpu_ps float @image_atomic_unchanged(<8 x i32> inreg, <4 x i32>, i32) { -main_body: - %t0 = call i32 @llvm.amdgcn.image.atomic.smin.v4i32(i32 %2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %t1 = call i32 @llvm.amdgcn.image.atomic.umin.v4i32(i32 %t0, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %t2 = call i32 @llvm.amdgcn.image.atomic.smax.v4i32(i32 %t1, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %t3 = call i32 @llvm.amdgcn.image.atomic.umax.v4i32(i32 %t2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %t4 = call i32 @llvm.amdgcn.image.atomic.and.v4i32(i32 %t3, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %t5 = call i32 @llvm.amdgcn.image.atomic.or.v4i32(i32 %t4, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %t6 = call i32 @llvm.amdgcn.image.atomic.xor.v4i32(i32 %t5, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %t7 = call i32 @llvm.amdgcn.image.atomic.inc.v4i32(i32 %t6, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %t8 = call i32 @llvm.amdgcn.image.atomic.dec.v4i32(i32 %t7, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0) - %out = bitcast i32 %t8 to float - ret float %out -} - -declare i32 @llvm.amdgcn.image.atomic.swap.i32(i32, i32, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.swap.v2i32(i32, <2 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.swap.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 - -declare i32 @llvm.amdgcn.image.atomic.cmpswap.v4i32(i32, i32, <4 x i32>, <8 x i32>,i1, i1, i1) #0 - -declare i32 @llvm.amdgcn.image.atomic.add.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.sub.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.smin.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.umin.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.smax.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.umax.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.and.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.or.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.xor.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.inc.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 -declare i32 @llvm.amdgcn.image.atomic.dec.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0 - -attributes #0 = { nounwind } Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.ll @@ -1,123 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s - -; GCN-LABEL: {{^}}image_load_f16 -; GCN: image_load v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 unorm d16 -define half @image_load_f16(<4 x i32> %coords, <8 x i32> inreg %rsrc) { -main_body: - %tex = call half @llvm.amdgcn.image.load.f16.v4i32.v8i32(<4 x i32> %coords, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false) - ret half %tex -} - -; GCN-LABEL: {{^}}image_load_v2f16: -; UNPACKED: image_load v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 unorm d16 -; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] - -; PACKED: image_load v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 unorm d16 -; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]] -define half @image_load_v2f16(<4 x i32> %coords, <8 x i32> inreg %rsrc) { -main_body: - %tex = call <2 x half> @llvm.amdgcn.image.load.v2f16.v4i32.v8i32(<4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false) - %elt = extractelement <2 x half> %tex, i32 1 - ret half %elt -} - -; GCN-LABEL: {{^}}image_load_v4f16: -; UNPACKED: image_load v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 -; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] - -; PACKED: image_load v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 -; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]] -define half @image_load_v4f16(<4 x i32> %coords, <8 x i32> inreg %rsrc) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.load.v4f16.v4i32.v8i32(<4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - %elt = extractelement <4 x half> %tex, i32 3 - ret half %elt -} - -; GCN-LABEL: {{^}}image_load_mip_v4f16: -; UNPACKED: image_load_mip v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 -; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] - -; PACKED: image_load_mip v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 -; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]] -define half @image_load_mip_v4f16(<4 x i32> %coords, <8 x i32> inreg %rsrc) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.load.mip.v4f16.v4i32.v8i32(<4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - %elt = extractelement <4 x half> %tex, i32 3 - ret half %elt -} - -; GCN-LABEL: {{^}}image_store_f16 -; GCN: s_load_dword s[[LO:[0-9]+]], -; GCN: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[LO]] -; GCN: image_store v[[V_LO]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 unorm d16 -define amdgpu_kernel void @image_store_f16(half %data, <4 x i32> %coords, <8 x i32> inreg %rsrc) { -main_body: - call void @llvm.amdgcn.image.store.f16.v4i32.v8i32(half %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; FIXME: Eliminate and to get low bits -; GCN-LABEL: {{^}}image_store_v2f16: -; UNPACKED: s_load_dword [[DATA:s[0-9]+]] -; UNPACKED-DAG: s_lshr_b32 [[UNPACK_1:s[0-9]+]], [[DATA]], 16 -; UNPACKED-DAG: s_and_b32 [[UNPACK_0:s[0-9]+]], [[DATA]], 0xffff -; UNPACKED-DAG: v_mov_b32_e32 v[[V_UNPACK_0:[0-9]+]], [[UNPACK_0]] -; UNPACKED-DAG: v_mov_b32_e32 v[[V_UNPACK_1:[0-9]+]], [[UNPACK_1]] -; UNPACKED: image_store v{{\[}}[[V_UNPACK_0]]:[[V_UNPACK_1]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 unorm d16 - -; PACKED: image_store v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 unorm d16 -define amdgpu_kernel void @image_store_v2f16(<2 x half> %data, <4 x i32> %coords, <8 x i32> inreg %rsrc) { -main_body: - call void @llvm.amdgcn.image.store.v2f16.v4i32.v8i32(<2 x half> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; GCN-LABEL: {{^}}image_store_v4f16: -; UNPACKED: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; UNPACKED-DAG: s_lshr_b32 s{{[0-9]+}}, s[[LO]], 16 -; UNPACKED-DAG: s_lshr_b32 s{{[0-9]+}}, s[[HI]], 16 -; UNPACKED: s_and_b32 -; UNPACKED: s_and_b32 -; UNPACKED: image_store v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 - -; PACKED: s_load_dwordx2 s{{\[}}[[DATA0:[0-9]+]]:[[DATA1:[0-9]+]]{{\]}} -; PACKED: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[DATA0]] -; PACKED: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[DATA1]] -; PACKED: image_store v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 -define amdgpu_kernel void @image_store_v4f16(<4 x half> %data, <4 x i32> %coords, <8 x i32> inreg %rsrc) { -main_body: - call void @llvm.amdgcn.image.store.v4f16.v4i32.v8i32(<4 x half> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; GCN-LABEL: {{^}}image_store_mip_v4f16: -; UNPACKED: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; UNPACKED-DAG: s_lshr_b32 s{{[0-9]+}}, s[[LO]], 16 -; UNPACKED-DAG: s_lshr_b32 s{{[0-9]+}}, s[[HI]], 16 -; UNPACKED: s_and_b32 -; UNPACKED: s_and_b32 -; UNPACKED: image_store_mip v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 - -; PACKED: s_load_dwordx2 s{{\[}}[[DATA0:[0-9]+]]:[[DATA1:[0-9]+]]{{\]}} -; PACKED: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[DATA0]] -; PACKED: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[DATA1]] -; PACKED: image_store_mip v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 -define amdgpu_kernel void @image_store_mip_v4f16(<4 x half> %data, <4 x i32> %coords, <8 x i32> inreg %rsrc) { -main_body: - call void @llvm.amdgcn.image.store.mip.v4f16.v4i32.v8i32(<4 x half> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret void -} - - -declare half @llvm.amdgcn.image.load.f16.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) -declare <2 x half> @llvm.amdgcn.image.load.v2f16.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.load.v4f16.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.load.mip.v4f16.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) - -declare void @llvm.amdgcn.image.store.f16.v4i32.v8i32(half, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) -declare void @llvm.amdgcn.image.store.v2f16.v4i32.v8i32(<2 x half>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) -declare void @llvm.amdgcn.image.store.v4f16.v4i32.v8i32(<4 x half>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) -declare void @llvm.amdgcn.image.store.mip.v4f16.v4i32.v8i32(<4 x half>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.ll @@ -1,102 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s - - -; GCN-LABEL: {{^}}image_gather4_v4f16: -; UNPACKED: image_gather4 v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_gather4 v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_gather4_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.gather4.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_gather4_cl_v4f16: -; UNPACKED: image_gather4_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_gather4_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_gather4_cl_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.gather4.cl.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_gather4_c_v4f16: -; UNPACKED: image_gather4_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_gather4_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_gather4_c_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.gather4.c.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_gather4_o_v4f16: -; UNPACKED: image_gather4_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_gather4_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_gather4_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.gather4.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_gather4_c_o_v4f16: -; UNPACKED: image_gather4_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_gather4_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_gather4_c_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.gather4.c.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -declare <4 x half> @llvm.amdgcn.image.gather4.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) - - -declare <4 x half> @llvm.amdgcn.image.gather4.cl.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.gather4.c.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.gather4.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.gather4.c.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll @@ -1,362 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s - -; GCN-LABEL: {{^}}gather4_v2: -; GCN: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_v2(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4: -; GCN: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_cl: -; GCN: image_gather4_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_l: -; GCN: image_gather4_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_l(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_b: -; GCN: image_gather4_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_b(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_b_cl: -; GCN: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_b_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_b_cl_v8: -; GCN: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_b_cl_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_lz_v2: -; GCN: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_lz_v2(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_lz: -; GCN: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_lz(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - - - -; GCN-LABEL: {{^}}gather4_o: -; GCN: image_gather4_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_cl_o: -; GCN: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_cl_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_cl_o_v8: -; GCN: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_cl_o_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_l_o: -; GCN: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_l_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_l_o_v8: -; GCN: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_l_o_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_b_o: -; GCN: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_b_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_b_o_v8: -; GCN: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_b_o_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_b_cl_o: -; GCN: image_gather4_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_b_cl_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_lz_o: -; GCN: image_gather4_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_lz_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - - -; GCN-LABEL: {{^}}gather4_c: -; GCN: image_gather4_c {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_cl: -; GCN: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_cl_v8: -; GCN: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_cl_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_l: -; GCN: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_l(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.l.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_l_v8: -; GCN: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_l_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.l.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_b: -; GCN: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_b(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.b.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_b_v8: -; GCN: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_b_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.b.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_b_cl: -; GCN: image_gather4_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_b_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_lz: -; GCN: image_gather4_c_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_lz(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - - -; GCN-LABEL: {{^}}gather4_c_o: -; GCN: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_o_v8: -; GCN: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_o_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_cl_o: -; GCN: image_gather4_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_cl_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_l_o: -; GCN: image_gather4_c_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_l_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_b_o: -; GCN: image_gather4_c_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_b_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_b_cl_o: -; GCN: image_gather4_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_b_cl_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_lz_o: -; GCN: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_lz_o(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_c_lz_o_v8: -; GCN: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_c_lz_o_v8(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v8f32.v8i32(<8 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - -declare <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - -declare <4 x float> @llvm.amdgcn.image.gather4.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.b.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - -declare <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - -attributes #0 = { nounwind readnone } Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll @@ -1,53 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s - -; GCN-LABEL: {{^}}getlod: -; GCN: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf da -; GCN: s_waitcnt vmcnt(0) -; GCN: store_dwordx4 -define amdgpu_kernel void @getlod(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.f32.v8i32(float undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}getlod_v2: -; GCN: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf da -; GCN: s_waitcnt vmcnt(0) -; GCN: store_dwordx4 -define amdgpu_kernel void @getlod_v2(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}getlod_v4: -; GCN: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf da -; GCN: s_waitcnt vmcnt(0) -; GCN: store_dwordx4 -define amdgpu_kernel void @getlod_v4(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 1) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_getlod_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_getlod_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - - -attributes #0 = { nounwind readnone } Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll @@ -1,210 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VERDE %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s - -; GCN-LABEL: {{^}}image_load_v4i32: -; GCN-NOT: s_waitcnt -; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm -; GCN: s_waitcnt vmcnt(0) -define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 { -main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret <4 x float> %tex -} - -; GCN-LABEL: {{^}}image_load_v2i32: -; GCN-NOT: s_waitcnt -; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm -; GCN: s_waitcnt vmcnt(0) -define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 { -main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret <4 x float> %tex -} - -; GCN-LABEL: {{^}}image_load_i32: -; GCN-NOT: s_waitcnt -; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm -; GCN: s_waitcnt vmcnt(0) -define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) #0 { -main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret <4 x float> %tex -} - -; GCN-LABEL: {{^}}image_load_mip: -; GCN-NOT: s_waitcnt -; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm -; GCN: s_waitcnt vmcnt(0) -define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 { -main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret <4 x float> %tex -} - -; GCN-LABEL: {{^}}image_load_1: -; GCN-NOT: s_waitcnt -; GCN: image_load v0, v[0:3], s[0:7] dmask:0x1 unorm -; GCN: s_waitcnt vmcnt(0) -define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 { -main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - %elt = extractelement <4 x float> %tex, i32 0 - ret float %elt -} - -; GCN-LABEL: {{^}}image_load_f32_v2i32: -; GCN-NOT: s_waitcnt -; GCN: image_load {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm -; GCN: s_waitcnt vmcnt(0) -define amdgpu_ps float @image_load_f32_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 { -main_body: - %tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false) - ret float %tex -} - -; GCN-LABEL: {{^}}image_load_v2f32_v4i32: -; GCN-NOT: s_waitcnt -; GCN: image_load {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm -; GCN: s_waitcnt vmcnt(0) -define amdgpu_ps <2 x float> @image_load_v2f32_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 { -main_body: - %tex = call <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false) - ret <2 x float> %tex -} - -; GCN-LABEL: {{^}}image_store_v4i32: -; GCN-NOT: s_waitcnt -; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm -define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 { -main_body: - call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; GCN-LABEL: {{^}}image_store_v2i32: -; GCN-NOT: s_waitcnt -; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm -define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) #0 { -main_body: - call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; GCN-LABEL: {{^}}image_store_i32: -; GCN-NOT: s_waitcnt -; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm -define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) #0 { -main_body: - call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; GCN-LABEL: {{^}}image_store_f32_i32: -; GCN-NOT: s_waitcnt -; GCN: image_store {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm -define amdgpu_ps void @image_store_f32_i32(<8 x i32> inreg %rsrc, float %data, i32 %coords) #0 { -main_body: - call void @llvm.amdgcn.image.store.f32.i32.v8i32(float %data, i32 %coords, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; GCN-LABEL: {{^}}image_store_v2f32_v4i32: -; GCN-NOT: s_waitcnt -; GCN: image_store {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm -define amdgpu_ps void @image_store_v2f32_v4i32(<8 x i32> inreg %rsrc, <2 x float> %data, <4 x i32> %coords) #0 { -main_body: - call void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; GCN-LABEL: {{^}}image_store_mip: -; GCN-NOT: s_waitcnt -; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm -define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 { -main_body: - call void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; GCN-LABEL: {{^}}getresinfo: -; GCN-NOT: s_waitcnt -; GCN: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -; GCN: s_waitcnt vmcnt(0) -; GCN: exp -define amdgpu_ps void @getresinfo() #0 { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false) - %r0 = extractelement <4 x float> %r, i32 0 - %r1 = extractelement <4 x float> %r, i32 1 - %r2 = extractelement <4 x float> %r, i32 2 - %r3 = extractelement <4 x float> %r, i32 3 - call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r0, float %r1, float %r2, float %r3, i1 true, i1 true) #0 - ret void -} - -; GCN-LABEL: {{^}}getresinfo_dmask0: -; GCN-NOT: image_get_resinfo -define amdgpu_ps void @getresinfo_dmask0() #0 { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false) - %r0 = extractelement <4 x float> %r, i32 0 - %r1 = extractelement <4 x float> %r, i32 1 - %r2 = extractelement <4 x float> %r, i32 2 - %r3 = extractelement <4 x float> %r, i32 3 - call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r0, float %r1, float %r2, float %r3, i1 true, i1 true) #0 - ret void -} - -; Ideally, the register allocator would avoid the wait here -; -; GCN-LABEL: {{^}}image_store_wait: -; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm -; VERDE: s_waitcnt expcnt(0) -; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm -; GCN: s_waitcnt vmcnt(0) -; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm -define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 { -main_body: - call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %arg3, i32 %arg4, <8 x i32> %arg, i32 15, i1 false, i1 false, i1 false, i1 false) - %data = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %arg4, <8 x i32> %arg1, i32 15, i1 false, i1 false, i1 false, i1 false) - call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %arg4, <8 x i32> %arg2, i32 15, i1 false, i1 false, i1 false, i1 false) - ret void -} - -; SI won't merge ds memory operations, because of the signed offset bug, so -; we only have check lines for VI. -; VI-LABEL: image_load_mmo -; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 -; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4 -define amdgpu_ps void @image_load_mmo(float addrspace(3)* %lds, <2 x i32> %c, <8 x i32> inreg %rsrc) #0 { -bb: - store float 0.000000e+00, float addrspace(3)* %lds - %tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) - %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4 - store float 0.000000e+00, float addrspace(3)* %tmp2 - call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tex, float %tex, float %tex, float %tex, i1 true, i1 true) #0 - ret void -} - -declare float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare void @llvm.amdgcn.image.store.f32.i32.v8i32(float, i32, <8 x i32>, i32, i1, i1, i1, i1) #0 - - -declare void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 - -declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #2 - -declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readonly } -attributes #2 = { nounwind readnone } Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.ll @@ -1,137 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s - - -; GCN-LABEL: {{^}}image_sample_f16: -; GCN: image_sample v[[HALF:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 d16 - -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off -define amdgpu_kernel void @image_sample_f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call half @llvm.amdgcn.image.sample.f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 1, i1 0, i1 0, i1 0, i1 0, i1 0) - store half %tex, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_sample_v2f16: -; UNPACKED: image_sample v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_sample v[[DATA:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 d16 - -; GFX81: v_lshrrev_b32_e32 v[[HI:[0-9]+]], 16, v[[DATA]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[DATA]], off -define amdgpu_kernel void @image_sample_v2f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <2 x half> @llvm.amdgcn.image.sample.v2f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 3, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <2 x half> %tex, i32 1 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_sample_v4f16: -; UNPACKED: image_sample v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_sample v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_sample_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.sample.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_sample_cl_v4f16: -; UNPACKED: image_sample_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_sample_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_sample_cl_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.sample.cl.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_sample_c_v4f16: -; UNPACKED: image_sample_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_sample_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_sample_c_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.sample.c.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_sample_o_v4f16: -; UNPACKED: image_sample_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_sample_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_sample_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.sample.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_sample_c_o_v4f16: -; UNPACKED: image_sample_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_sample_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 - -; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off -define amdgpu_kernel void @image_sample_c_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <4 x half> @llvm.amdgcn.image.sample.c.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <4 x half> %tex, i32 3 - store half %elt, half addrspace(1)* %out - ret void -} - -declare half @llvm.amdgcn.image.sample.f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <2 x half> @llvm.amdgcn.image.sample.v2f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.sample.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) - - -declare <4 x half> @llvm.amdgcn.image.sample.cl.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.sample.c.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.sample.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <4 x half> @llvm.amdgcn.image.sample.c.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll @@ -1,435 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s - -; GCN-LABEL: {{^}}sample: -; GCN: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_cl: -; GCN: image_sample_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_d: -; GCN: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_d(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_d_cl: -; GCN: image_sample_d_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_d_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.d.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_l: -; GCN: image_sample_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_l(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_b: -; GCN: image_sample_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_b(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_b_cl: -; GCN: image_sample_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_b_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.b.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_lz: -; GCN: image_sample_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_lz(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.lz.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_cd: -; GCN: image_sample_cd {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_cd(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cd.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_cd_cl: -; GCN: image_sample_cd_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_cd_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c: -; GCN: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_cl: -; GCN: image_sample_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_d: -; GCN: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_d(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.d.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_d_cl: -; GCN: image_sample_c_d_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_d_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_l: -; GCN: image_sample_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_l(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.l.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_b: -; GCN: image_sample_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_b(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.b.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_b_cl: -; GCN: image_sample_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_b_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_lz: -; GCN: image_sample_c_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_lz(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.lz.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_cd: -; GCN: image_sample_c_cd {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_cd(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cd.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_cd_cl: -; GCN: image_sample_c_cd_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_cd_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_f32: -; GCN: image_sample {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 -define amdgpu_kernel void @sample_f32(float addrspace(1)* %out) { -main_body: - %r = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 0) - store float %r, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_v2f32: -; GCN: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 -define amdgpu_kernel void @sample_v2f32(<2 x float> addrspace(1)* %out) { -main_body: - %r = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 0, i1 0, i1 0, i1 0, i1 0) - store <2 x float> %r, <2 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_0: -; GCN: image_sample v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1{{$}} -define amdgpu_kernel void @adjust_writemask_sample_0(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_01: -; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x3{{$}} -define amdgpu_kernel void @adjust_writemask_sample_01(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt0 = extractelement <4 x float> %r, i32 0 - %elt1 = extractelement <4 x float> %r, i32 1 - store volatile float %elt0, float addrspace(1)* %out - store volatile float %elt1, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_012: -; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x7{{$}} -define amdgpu_kernel void @adjust_writemask_sample_012(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt0 = extractelement <4 x float> %r, i32 0 - %elt1 = extractelement <4 x float> %r, i32 1 - %elt2 = extractelement <4 x float> %r, i32 2 - store volatile float %elt0, float addrspace(1)* %out - store volatile float %elt1, float addrspace(1)* %out - store volatile float %elt2, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_12: -; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x6{{$}} -define amdgpu_kernel void @adjust_writemask_sample_12(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt1 = extractelement <4 x float> %r, i32 1 - %elt2 = extractelement <4 x float> %r, i32 2 - store volatile float %elt1, float addrspace(1)* %out - store volatile float %elt2, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_03: -; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x9{{$}} -define amdgpu_kernel void @adjust_writemask_sample_03(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt0 = extractelement <4 x float> %r, i32 0 - %elt3 = extractelement <4 x float> %r, i32 3 - store volatile float %elt0, float addrspace(1)* %out - store volatile float %elt3, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_13: -; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xa{{$}} -define amdgpu_kernel void @adjust_writemask_sample_13(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt1 = extractelement <4 x float> %r, i32 1 - %elt3 = extractelement <4 x float> %r, i32 3 - store volatile float %elt1, float addrspace(1)* %out - store volatile float %elt3, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_123: -; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xe{{$}} -define amdgpu_kernel void @adjust_writemask_sample_123(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt1 = extractelement <4 x float> %r, i32 1 - %elt2 = extractelement <4 x float> %r, i32 2 - %elt3 = extractelement <4 x float> %r, i32 3 - store volatile float %elt1, float addrspace(1)* %out - store volatile float %elt2, float addrspace(1)* %out - store volatile float %elt3, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_variable_dmask_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_variable_dmask_enabled(float addrspace(1)* %out, i32 %dmask) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 %dmask, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - - -; GCN-LABEL: {{^}}adjust_writemask_sample_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_cl_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_cl_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_d_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_d_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_d_cl_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_d_cl_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.d.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_l_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_l_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_b_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_b_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_b_cl_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_b_cl_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.b.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_lz_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_lz_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.lz.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_cd_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_cd_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cd.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_cd_cl_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_cd_cl_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.d.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.cd.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - -declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - -declare float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - -attributes #0 = { nounwind readnone } Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.ll @@ -1,427 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s - -; GCN-LABEL: {{^}}sample: -; GCN: image_sample_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_cl: -; GCN: image_sample_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_d: -; GCN: image_sample_d_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_d(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.d.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_d_cl: -; GCN: image_sample_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_d_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_l: -; GCN: image_sample_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_l(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.l.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_b: -; GCN: image_sample_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_b(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.b.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_b_cl: -; GCN: image_sample_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_b_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_lz: -; GCN: image_sample_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_lz(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.lz.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_cd: -; GCN: image_sample_cd_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_cd(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cd.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_cd_cl: -; GCN: image_sample_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_cd_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c: -; GCN: image_sample_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_cl: -; GCN: image_sample_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_d: -; GCN: image_sample_c_d_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_d(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_d_cl: -; GCN: image_sample_c_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_d_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_l: -; GCN: image_sample_c_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_l(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_b: -; GCN: image_sample_c_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_b(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_b_cl: -; GCN: image_sample_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_b_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_lz: -; GCN: image_sample_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_lz(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_cd: -; GCN: image_sample_c_cd_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_cd(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}sample_c_cd_cl: -; GCN: image_sample_c_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf -define amdgpu_kernel void @sample_c_cd_cl(<4 x float> addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) - store <4 x float> %r, <4 x float> addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_cl_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_cl_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_d_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_d_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.d.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_d_cl_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_d_cl_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_l_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_l_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.l.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_b_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_b_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.b.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_b_cl_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_b_cl_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_lz_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_lz_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.lz.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_cd_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_cd_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cd.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_cd_cl_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_cd_cl_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_cl_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_cl_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_d_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_d_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_d_cl_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_d_cl_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_l_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_l_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_b_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_b_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_b_cl_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_b_cl_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_lz_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_lz_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_cd_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_cd_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}adjust_writemask_sample_c_cd_cl_o_none_enabled: -; GCN-NOT: image -; GCN-NOT: store -define amdgpu_kernel void @adjust_writemask_sample_c_cd_cl_o_none_enabled(float addrspace(1)* %out) { -main_body: - %r = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false) - %elt0 = extractelement <4 x float> %r, i32 0 - store float %elt0, float addrspace(1)* %out - ret void -} - -declare <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.d.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.cd.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - -declare <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - - -attributes #0 = { nounwind readnone } Index: llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll +++ llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll @@ -1,59 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s - -; This shader has the potential to generated illegal VGPR to SGPR copies if -; the wrong register class is used for the REG_SEQUENCE instructions. - -; GCN-LABEL: {{^}}main: -; GCN: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf -define amdgpu_ps void @main(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <8 x i32> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 { -main_body: - %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0 - %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0 - %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16) - %tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %arg2, i32 0 - %tmp23 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp22, !tbaa !0 - %tmp24 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg1, i32 0 - %tmp25 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp24, !tbaa !0 - %i.i = extractelement <2 x i32> %arg5, i32 0 - %j.i = extractelement <2 x i32> %arg5, i32 1 - %i.f.i = bitcast i32 %i.i to float - %j.f.i = bitcast i32 %j.i to float - %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg3) #0 - %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg3) #0 - %i.i1 = extractelement <2 x i32> %arg5, i32 0 - %j.i2 = extractelement <2 x i32> %arg5, i32 1 - %i.f.i3 = bitcast i32 %i.i1 to float - %j.f.i4 = bitcast i32 %j.i2 to float - %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg3) #0 - %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg3) #0 - %tmp28 = bitcast float %tmp21 to i32 - %tmp29 = bitcast float %p2.i to i32 - %tmp30 = bitcast float %p2.i6 to i32 - %tmp31 = insertelement <4 x i32> undef, i32 %tmp28, i32 0 - %tmp32 = insertelement <4 x i32> %tmp31, i32 %tmp29, i32 1 - %tmp33 = insertelement <4 x i32> %tmp32, i32 %tmp30, i32 2 - %tmp34 = insertelement <4 x i32> %tmp33, i32 undef, i32 3 - %tmp34.bc = bitcast <4 x i32> %tmp34 to <4 x float> - %tmp35 = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> %tmp34.bc, <8 x i32> %tmp23, <4 x i32> %tmp25, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) - %tmp36 = extractelement <4 x float> %tmp35, i32 0 - %tmp37 = extractelement <4 x float> %tmp35, i32 1 - %tmp38 = extractelement <4 x float> %tmp35, i32 2 - %tmp39 = extractelement <4 x float> %tmp35, i32 3 - call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp36, float %tmp37, float %tmp38, float %tmp39, i1 true, i1 true) #0 - ret void -} - -declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1 -declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1 -declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 -declare <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 -declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } -attributes #2 = { nounwind readonly } - -!0 = !{!1, !1, i64 0, i32 1} -!1 = !{!"const", !2} -!2 = !{!"tbaa root"}