Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -195,14 +195,12 @@ [llvm_anyint_ty, // vaddr(VGPR) llvm_v8i32_ty, // rsrc(SGPR) llvm_i32_ty, // dmask(imm) - llvm_i1_ty, // r128(imm) - llvm_i1_ty, // da(imm) - llvm_i1_ty, // glc(imm) - llvm_i1_ty], // slc(imm) + llvm_i32_ty], // flags (imm) [IntrReadMem]>; def int_amdgcn_image_load : AMDGPUImageLoad; def int_amdgcn_image_load_mip : AMDGPUImageLoad; +def int_amdgcn_getresinfo : AMDGPUImageLoad; class AMDGPUImageStore : Intrinsic < [], @@ -210,15 +208,106 @@ llvm_anyint_ty, // vaddr(VGPR) llvm_v8i32_ty, // rsrc(SGPR) llvm_i32_ty, // dmask(imm) - llvm_i1_ty, // r128(imm) - llvm_i1_ty, // da(imm) - llvm_i1_ty, // glc(imm) - llvm_i1_ty], // slc(imm) + llvm_i32_ty], // flags (imm) []>; def int_amdgcn_image_store : AMDGPUImageStore; def int_amdgcn_image_store_mip : AMDGPUImageStore; + +class AMDGPUSampleRaw : Intrinsic < + [llvm_v4f32_ty], // vdata(VGPR) + [llvm_anyint_ty, // vaddr(VGPR) + llvm_v8i32_ty, // rsrc(SGPR) + llvm_v4i32_ty, // sampler(SGPR) + llvm_i32_ty, // dmask(imm) + llvm_i32_ty], // flags(imm) + [IntrNoMem]>; + +// Basic sample +def int_amdgcn_image_sample : AMDGPUSampleRaw; +def int_amdgcn_image_sample_cl : AMDGPUSampleRaw; +def int_amdgcn_image_sample_d : AMDGPUSampleRaw; +def int_amdgcn_image_sample_d_cl : AMDGPUSampleRaw; +def int_amdgcn_image_sample_l : AMDGPUSampleRaw; +def int_amdgcn_image_sample_b : AMDGPUSampleRaw; +def int_amdgcn_image_sample_b_cl : AMDGPUSampleRaw; +def int_amdgcn_image_sample_lz : AMDGPUSampleRaw; +def int_amdgcn_image_sample_cd : AMDGPUSampleRaw; +def int_amdgcn_image_sample_cd_cl : AMDGPUSampleRaw; + +// Sample with comparison +def int_amdgcn_image_sample_c : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_cl : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_d : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_d_cl : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_l : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_b : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_b_cl : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_lz : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_cd : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_cd_cl : AMDGPUSampleRaw; + +// Sample with offsets +def int_amdgcn_image_sample_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_cl_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_d_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_d_cl_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_l_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_b_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_b_cl_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_lz_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_cd_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_cd_cl_o : AMDGPUSampleRaw; + +// Sample with comparison and offsets +def int_amdgcn_image_sample_c_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_cl_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_d_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_d_cl_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_l_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_b_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_b_cl_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_lz_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_cd_o : AMDGPUSampleRaw; +def int_amdgcn_image_sample_c_cd_cl_o : AMDGPUSampleRaw; + +// Basic gather4 +def int_amdgcn_gather4 : AMDGPUSampleRaw; +def int_amdgcn_gather4_cl : AMDGPUSampleRaw; +def int_amdgcn_gather4_l : AMDGPUSampleRaw; +def int_amdgcn_gather4_b : AMDGPUSampleRaw; +def int_amdgcn_gather4_b_cl : AMDGPUSampleRaw; +def int_amdgcn_gather4_lz : AMDGPUSampleRaw; + +// Gather4 with comparison +def int_amdgcn_gather4_c : AMDGPUSampleRaw; +def int_amdgcn_gather4_c_cl : AMDGPUSampleRaw; +def int_amdgcn_gather4_c_l : AMDGPUSampleRaw; +def int_amdgcn_gather4_c_b : AMDGPUSampleRaw; +def int_amdgcn_gather4_c_b_cl : AMDGPUSampleRaw; +def int_amdgcn_gather4_c_lz : AMDGPUSampleRaw; + +// Gather4 with offsets +def int_amdgcn_gather4_o : AMDGPUSampleRaw; +def int_amdgcn_gather4_cl_o : AMDGPUSampleRaw; +def int_amdgcn_gather4_l_o : AMDGPUSampleRaw; +def int_amdgcn_gather4_b_o : AMDGPUSampleRaw; +def int_amdgcn_gather4_b_cl_o : AMDGPUSampleRaw; +def int_amdgcn_gather4_lz_o : AMDGPUSampleRaw; + +// Gather4 with comparison and offsets +def int_amdgcn_gather4_c_o : AMDGPUSampleRaw; +def int_amdgcn_gather4_c_cl_o : AMDGPUSampleRaw; +def int_amdgcn_gather4_c_l_o : AMDGPUSampleRaw; +def int_amdgcn_gather4_c_b_o : AMDGPUSampleRaw; +def int_amdgcn_gather4_c_b_cl_o : AMDGPUSampleRaw; +def int_amdgcn_gather4_c_lz_o : AMDGPUSampleRaw; + +def int_amdgcn_getlod : AMDGPUSampleRaw; + + + class AMDGPUImageAtomic : Intrinsic < [llvm_i32_ty], [llvm_i32_ty, // vdata(VGPR) Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -153,6 +153,15 @@ void SelectBRCOND(SDNode *N); void SelectATOMIC_CMP_SWAP(SDNode *N); + bool SelectImageFlagBits(SDValue ArgNode, SDValue &unorm, + SDValue &glc, + SDValue &slc, + SDValue &r128, + SDValue &tfe, + SDValue &lwe, + SDValue &da); + + // Include the pieces autogenerated from the target description. #include "AMDGPUGenDAGISel.inc" }; @@ -1075,6 +1084,61 @@ return true; } +bool AMDGPUDAGToDAGISel::SelectImageFlagBits(SDValue ArgNode, + SDValue &unorm, + SDValue &glc, + SDValue &slc, + SDValue &r128, + SDValue &tfe, + SDValue &lwe, + SDValue &da) { + //SDValue &d16, + //SDValue &rsvd) { + ConstantSDNode *C = dyn_cast(ArgNode); + if (!C) + return false; + + SDLoc SL(ArgNode); + int64_t ArgImm = C->getZExtValue(); + int UNORMBit=0, GLCBit=0, SLCBit=0, R128Bit=0, + TFEBit=0, LWEBit=0, DABit=0; + + if (ArgImm & llvm::MIMGFlags::UNORM) + UNORMBit = 1; + if (ArgImm & llvm::MIMGFlags::GLC) + GLCBit = 1; + if (ArgImm & llvm::MIMGFlags::SLC) + SLCBit = 1; + if (ArgImm & llvm::MIMGFlags::R128) + R128Bit = 1; + if (ArgImm & llvm::MIMGFlags::TFE) + TFEBit = 1; + if (ArgImm & llvm::MIMGFlags::LWE) + LWEBit = 1; + if (ArgImm & llvm::MIMGFlags::DA) + DABit = 1; + /* if (ArgImm & llvm::MIMGFlags::D16) + D16Bit = 1; + if (ArgImm & llvm::MIMGFlags::RSVD) + RSVDBit = 1; + */ + + unorm = CurDAG->getTargetConstant(UNORMBit, SL, MVT::i1); + glc = CurDAG->getTargetConstant(GLCBit, SL, MVT::i1); + slc = CurDAG->getTargetConstant(SLCBit, SL, MVT::i1); + r128 = CurDAG->getTargetConstant(R128Bit, SL, MVT::i1); + + tfe = CurDAG->getTargetConstant(TFEBit, SL, MVT::i1); + lwe = CurDAG->getTargetConstant(LWEBit, SL, MVT::i1); + da = CurDAG->getTargetConstant(DABit, SL, MVT::i1); + // d16 = CurDAG->getTargetConstant(D16Bit, SL, MVT::i1); + // rsvd = CurDAG->getTargetConstant(RSVDBit, SL, MVT::i1); + + return true; +} + + + /// /// \param EncodedOffset This is the immediate value that will be encoded /// directly into the instruction. On SI/CI the \p EncodedOffset Index: lib/Target/AMDGPU/SIDefines.h =================================================================== --- lib/Target/AMDGPU/SIDefines.h +++ lib/Target/AMDGPU/SIDefines.h @@ -105,6 +105,22 @@ } namespace llvm { +namespace MIMGFlags { +enum { + UNORM = 1 << 0, + GLC = 1 << 1, + SLC = 1 << 2, + R128 = 1 << 3, + TFE = 1 << 4, + LWE = 1 << 5, + DA = 1 << 6, + D16 = 1 << 7, + RSVD = 1 << 8 +}; +} +} + +namespace llvm { namespace AMDGPU { namespace EncValues { // Encoding values of enum9/8/7 operands Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -633,6 +633,9 @@ def VOP3Mods : ComplexPattern; def VOP3NoMods : ComplexPattern; +def ImageFlagParameters: ComplexPattern; + + //===----------------------------------------------------------------------===// // SI assembler operands //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -2536,6 +2536,25 @@ def : SampleRawPattern(opcode # _V4_V16), v16i32>; } + +// Image + sampler +class AMDGCNSamplePattern : Pat < + (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, (ImageFlagParameters + i1:$unorm, i1:$glc, i1:$slc, i1:$r128, i1:$tfe, i1:$lwe, i1:$da)), + (opcode $addr, $rsrc, $sampler, + (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc), + (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $da)) +>; + +multiclass AMDGCNSamplePatterns { + def : AMDGCNSamplePattern(opcode # _V4_V1), i32>; + def : AMDGCNSamplePattern(opcode # _V4_V2), v2i32>; + def : AMDGCNSamplePattern(opcode # _V4_V4), v4i32>; + def : AMDGCNSamplePattern(opcode # _V4_V8), v8i32>; + def : AMDGCNSamplePattern(opcode # _V4_V16), v16i32>; +} + + // Image only class ImagePattern : Pat < (name vt:$addr, v8i32:$rsrc, imm:$dmask, imm:$unorm, @@ -2552,11 +2571,11 @@ } class ImageLoadPattern : Pat < - (name vt:$addr, v8i32:$rsrc, imm:$dmask, imm:$r128, imm:$da, imm:$glc, - imm:$slc), + (name vt:$addr, v8i32:$rsrc, imm:$dmask, (ImageFlagParameters i1:$unorm, i1:$glc, + i1:$slc, i1:$r128, i1:$tfe, i1:$lwe, i1:$da)), (opcode $addr, $rsrc, - (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc), - (as_i1imm $r128), 0, 0, (as_i1imm $da)) + (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc), + (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $da)) >; multiclass ImageLoadPatterns { @@ -2566,11 +2585,11 @@ } class ImageStorePattern : Pat < - (name v4f32:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, imm:$r128, imm:$da, - imm:$glc, imm:$slc), + (name v4f32:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, (ImageFlagParameters i1:$unorm, + i1:$glc, i1:$slc, i1:$r128, i1:$tfe, i1:$lwe, i1:$da)), (opcode $data, $addr, $rsrc, - (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc), - (as_i1imm $r128), 0, 0, (as_i1imm $da)) + (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc), + (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $da)) >; multiclass ImageStorePatterns { @@ -2693,6 +2712,7 @@ def : SampleRawPattern; def : ImagePattern; +def : ImageLoadPattern; defm : ImagePatterns; defm : ImagePatterns; defm : ImageLoadPatterns; @@ -2715,6 +2735,102 @@ defm : ImageAtomicPatterns; defm : ImageAtomicPatterns; +//======================== update for amdgcn ================================= +// Basic sample +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; + +// Sample with comparison +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; + +// Sample with offsets +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; + +// Sample with comparison and offsets +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; +defm : AMDGCNSamplePatterns; + +// Gather opcodes +// Only the variants which make sense are defined. +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; + +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; + +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; + +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; + +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; +def : AMDGCNSamplePattern; + + + /* SIsample for simple 1D texture lookup */ def : Pat < (SIsample i32:$addr, v8i32:$rsrc, v4i32:$sampler, imm), Index: test/CodeGen/AMDGPU/llvm.amdgcn.gather4.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.gather4.ll @@ -0,0 +1,506 @@ +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s + +;CHECK-LABEL: {{^}}gather4_v2: +;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_v2() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4: +;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_cl: +;CHECK: image_gather4_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_l: +;CHECK: image_gather4_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_l() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_b: +;CHECK: image_gather4_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_b() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_b_cl: +;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_b_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_b_cl_v8: +;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_b_cl_v8() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.b.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_lz_v2: +;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_lz_v2() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.lz.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_lz: +;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_lz() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + + + +;CHECK-LABEL: {{^}}gather4_o: +;CHECK: image_gather4_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_o() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_cl_o: +;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_cl_o() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_cl_o_v8: +;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_cl_o_v8() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_l_o: +;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_l_o() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_l_o_v8: +;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_l_o_v8() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.l.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_b_o: +;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_b_o() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_b_o_v8: +;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_b_o_v8() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.b.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_b_cl_o: +;CHECK: image_gather4_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_b_cl_o() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.b.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_lz_o: +;CHECK: image_gather4_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_lz_o() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + + +;CHECK-LABEL: {{^}}gather4_c: +;CHECK: image_gather4_c {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_c_cl: +;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_c_cl_v8: +;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c_cl_v8() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_c_l: +;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c_l() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_c_l_v8: +;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c_l_v8() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.l.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_c_b: +;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c_b() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_c_b_v8: +;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c_b_v8() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.b.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_c_b_cl: +;CHECK: image_gather4_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c_b_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.b.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_c_lz: +;CHECK: image_gather4_c_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c_lz() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + + + +;CHECK-LABEL: {{^}}gather4_c_o: +;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c_o() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_c_o_v8: +;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c_o_v8() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_c_cl_o: +;CHECK: image_gather4_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c_cl_o() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_c_l_o: +;CHECK: image_gather4_c_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c_l_o() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.l.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_c_b_o: +;CHECK: image_gather4_c_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c_b_o() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.b.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_c_b_cl_o: +;CHECK: image_gather4_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c_b_cl_o() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.b.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_c_lz_o: +;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c_lz_o() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}gather4_c_lz_o_v8: +;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da +define void @gather4_c_lz_o_v8() { +main_body: + %r = call <4 x float> @llvm.amdgcn.gather4.c.lz.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + + +declare <4 x float> @llvm.amdgcn.gather4.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.b.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.lz.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 + +declare <4 x float> @llvm.amdgcn.gather4.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.l.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.b.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.b.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 + +declare <4 x float> @llvm.amdgcn.gather4.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.c.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.c.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.c.l.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.c.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.c.b.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.c.b.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.c.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 + +declare <4 x float> @llvm.amdgcn.gather4.c.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.c.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.c.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.c.l.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.c.b.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.c.b.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.c.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.gather4.c.lz.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { nounwind readnone } Index: test/CodeGen/AMDGPU/llvm.amdgcn.getload.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.getload.ll @@ -0,0 +1,44 @@ +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s + +;CHECK-LABEL: {{^}}getlod: +;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 da +define void @getlod() { +main_body: + %r = call <4 x float> @llvm.amdgcn.getlod.i32(i32 undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 64) ; flag = da + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1) + ret void +} + +;CHECK-LABEL: {{^}}getlod_v2: +;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 da +define void @getlod_v2() { +main_body: + %r = call <4 x float> @llvm.amdgcn.getlod.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 64) ; flag = da + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1) + ret void +} + +;CHECK-LABEL: {{^}}getlod_v4: +;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 da +define void @getlod_v4() { +main_body: + %r = call <4 x float> @llvm.amdgcn.getlod.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 64) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1) + ret void +} + + +declare <4 x float> @llvm.amdgcn.getlod.i32(i32, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.getlod.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.getlod.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { nounwind readnone } Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.ll @@ -1,12 +1,13 @@ ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s + ;CHECK-LABEL: {{^}}image_load_v4i32: ;CHECK: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm ;CHECK: s_waitcnt vmcnt(0) define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) { main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + %tex = call <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i32 1) ; 1<<0 unorm ret <4 x float> %tex } @@ -15,7 +16,7 @@ ;CHECK: s_waitcnt vmcnt(0) define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) { main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.v2i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + %tex = call <4 x float> @llvm.amdgcn.image.load.v2i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i32 1) ; 1<<0 unorm ret <4 x float> %tex } @@ -24,7 +25,7 @@ ;CHECK: s_waitcnt vmcnt(0) define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) { main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + %tex = call <4 x float> @llvm.amdgcn.image.load.i32(i32 %c, <8 x i32> %rsrc, i32 15, i32 1) ; 1<<0 unorm ret <4 x float> %tex } @@ -33,7 +34,7 @@ ;CHECK: s_waitcnt vmcnt(0) define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) { main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + %tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i32 1) ; 1<<0 unorm ret <4 x float> %tex } @@ -42,7 +43,7 @@ ;CHECK: s_waitcnt vmcnt(0) define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) { main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + %tex = call <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i32 1) ; 1<<0 unorm %elt = extractelement <4 x float> %tex, i32 0 ; Only first component used, test that dmask etc. is changed accordingly ret float %elt @@ -52,7 +53,7 @@ ;CHECK: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) { main_body: - call void @llvm.amdgcn.image.store.v4i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + call void @llvm.amdgcn.image.store.v4i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i32 1) ; 1<<0 unorm ret void } @@ -60,7 +61,7 @@ ;CHECK: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) { main_body: - call void @llvm.amdgcn.image.store.v2i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + call void @llvm.amdgcn.image.store.v2i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i32 1) ; 1<<0 unorm ret void } @@ -68,7 +69,7 @@ ;CHECK: image_store v[0:3], v4, s[0:7] dmask:0xf unorm define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) { main_body: - call void @llvm.amdgcn.image.store.i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + call void @llvm.amdgcn.image.store.i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i32 1) ; 1<<0 unorm ret void } @@ -76,7 +77,7 @@ ;CHECK: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) { main_body: - call void @llvm.amdgcn.image.store.mip.v4i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + call void @llvm.amdgcn.image.store.mip.v4i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i32 1) ; 1<<0 unorm ret void } @@ -90,21 +91,38 @@ ;CHECK: image_store v[0:3], v4, s[16:23] dmask:0xf unorm define amdgpu_ps void @image_store_wait(<8 x i32> inreg, <8 x i32> inreg, <8 x i32> inreg, <4 x float>, i32) { main_body: - call void @llvm.amdgcn.image.store.i32(<4 x float> %3, i32 %4, <8 x i32> %0, i32 15, i1 0, i1 0, i1 0, i1 0) - %data = call <4 x float> @llvm.amdgcn.image.load.i32(i32 %4, <8 x i32> %1, i32 15, i1 0, i1 0, i1 0, i1 0) - call void @llvm.amdgcn.image.store.i32(<4 x float> %data, i32 %4, <8 x i32> %2, i32 15, i1 0, i1 0, i1 0, i1 0) + call void @llvm.amdgcn.image.store.i32(<4 x float> %3, i32 %4, <8 x i32> %0, i32 15, i32 1) ; 1<<0 unorm + %data = call <4 x float> @llvm.amdgcn.image.load.i32(i32 %4, <8 x i32> %1, i32 15, i32 1) ; 1<<0 unorm + call void @llvm.amdgcn.image.store.i32(<4 x float> %data, i32 %4, <8 x i32> %2, i32 15, i32 1) ; 1<<0 unorm ret void } -declare void @llvm.amdgcn.image.store.i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.v2i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 -declare void @llvm.amdgcn.image.store.mip.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0 +;CHECK-LABEL: {{^}}getresinfo: +;CHECK: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define amdgpu_ps void @getresinfo() { +main_body: + %r = call <4 x float> @llvm.amdgcn.getresinfo.i32(i32 undef, <8 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + + +declare void @llvm.amdgcn.image.store.i32(<4 x float>, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.v2i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i32) #0 + +declare <4 x float> @llvm.amdgcn.image.load.i32(i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.v2i32(<2 x i32>, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.v4i32(<4 x i32>, <8 x i32>, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.load.i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.load.v2i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.load.mip.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.getresinfo.i32(i32, <8 x i32>, i32, i32) +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { nounwind } attributes #1 = { nounwind readonly } Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.sample-masked.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.sample-masked.ll @@ -0,0 +1,94 @@ +;RUN: llc < %s -march=amdgcn -mcpu=verde | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s + +; CHECK-LABEL: {{^}}v1: +; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xd +define amdgpu_ps void @v1(i32 %a1) { +entry: + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 + %1 = call <4 x float> @llvm.amdgcn.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %2 = extractelement <4 x float> %1, i32 0 + %3 = extractelement <4 x float> %1, i32 2 + %4 = extractelement <4 x float> %1, i32 3 + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4) + ret void +} + +; CHECK-LABEL: {{^}}v2: +; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xb +define amdgpu_ps void @v2(i32 %a1) { +entry: + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 + %1 = call <4 x float> @llvm.amdgcn.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %2 = extractelement <4 x float> %1, i32 0 + %3 = extractelement <4 x float> %1, i32 1 + %4 = extractelement <4 x float> %1, i32 3 + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4) + ret void +} + +; CHECK-LABEL: {{^}}v3: +; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xe +define amdgpu_ps void @v3(i32 %a1) { +entry: + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 + %1 = call <4 x float> @llvm.amdgcn.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %2 = extractelement <4 x float> %1, i32 1 + %3 = extractelement <4 x float> %1, i32 2 + %4 = extractelement <4 x float> %1, i32 3 + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4) + ret void +} + +; CHECK-LABEL: {{^}}v4: +; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x7 +define amdgpu_ps void @v4(i32 %a1) { +entry: + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 + %1 = call <4 x float> @llvm.amdgcn.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %2 = extractelement <4 x float> %1, i32 0 + %3 = extractelement <4 x float> %1, i32 1 + %4 = extractelement <4 x float> %1, i32 2 + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4) + ret void +} + +; CHECK-LABEL: {{^}}v5: +; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xa +define amdgpu_ps void @v5(i32 %a1) { +entry: + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 + %1 = call <4 x float> @llvm.amdgcn.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %2 = extractelement <4 x float> %1, i32 1 + %3 = extractelement <4 x float> %1, i32 3 + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3) + ret void +} + +; CHECK-LABEL: {{^}}v6: +; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x6 +define amdgpu_ps void @v6(i32 %a1) { +entry: + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 + %1 = call <4 x float> @llvm.amdgcn.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %2 = extractelement <4 x float> %1, i32 1 + %3 = extractelement <4 x float> %1, i32 2 + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3) + ret void +} + +; CHECK-LABEL: {{^}}v7: +; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x9 +define amdgpu_ps void @v7(i32 %a1) { +entry: + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 + %1 = call <4 x float> @llvm.amdgcn.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %2 = extractelement <4 x float> %1, i32 0 + %3 = extractelement <4 x float> %1, i32 3 + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3) + ret void +} + +declare <4 x float> @llvm.amdgcn.image.sample.v1i32(<1 x i32>, <8 x i32>, <4 x i32>, i32, i32) readnone + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll @@ -0,0 +1,289 @@ +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s + +;CHECK-LABEL: {{^}}sample: +;CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_cl: +;CHECK: image_sample_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_d: +;CHECK: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_d() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.d.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_d_cl: +;CHECK: image_sample_d_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_d_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.d.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_l: +;CHECK: image_sample_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_l() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_b: +;CHECK: image_sample_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_b() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_b_cl: +;CHECK: image_sample_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_b_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_lz: +;CHECK: image_sample_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_lz() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_cd: +;CHECK: image_sample_cd {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_cd() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.cd.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_cd_cl: +;CHECK: image_sample_cd_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_cd_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c: +;CHECK: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_cl: +;CHECK: image_sample_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_d: +;CHECK: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_d() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.d.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_d_cl: +;CHECK: image_sample_c_d_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_d_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_l: +;CHECK: image_sample_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_l() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_b: +;CHECK: image_sample_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_b() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_b_cl: +;CHECK: image_sample_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_b_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_lz: +;CHECK: image_sample_c_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_lz() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_cd: +;CHECK: image_sample_c_cd {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_cd() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.cd.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_cd_cl: +;CHECK: image_sample_c_cd_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_cd_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + + +declare <4 x float> @llvm.amdgcn.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.d.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.cd.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 + +declare <4 x float> @llvm.amdgcn.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { nounwind readnone } Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.ll @@ -0,0 +1,289 @@ +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s + +;CHECK-LABEL: {{^}}sample: +;CHECK: image_sample_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_cl: +;CHECK: image_sample_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_d: +;CHECK: image_sample_d_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_d() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.d.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_d_cl: +;CHECK: image_sample_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_d_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_l: +;CHECK: image_sample_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_l() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_b: +;CHECK: image_sample_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_b() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_b_cl: +;CHECK: image_sample_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_b_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_lz: +;CHECK: image_sample_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_lz() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_cd: +;CHECK: image_sample_cd_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_cd() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.cd.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_cd_cl: +;CHECK: image_sample_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_cd_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c: +;CHECK: image_sample_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_cl: +;CHECK: image_sample_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_d: +;CHECK: image_sample_c_d_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_d() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_d_cl: +;CHECK: image_sample_c_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_d_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_l: +;CHECK: image_sample_c_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_l() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_b: +;CHECK: image_sample_c_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_b() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_b_cl: +;CHECK: image_sample_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_b_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_lz: +;CHECK: image_sample_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_lz() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_cd: +;CHECK: image_sample_c_cd_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_cd() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: {{^}}sample_c_cd_cl: +;CHECK: image_sample_c_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +define void @sample_c_cd_cl() { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + + +declare <4 x float> @llvm.amdgcn.image.sample.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.d.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.cd.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 + +declare <4 x float> @llvm.amdgcn.image.sample.c.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #0 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { nounwind readnone } Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll @@ -7,9 +7,9 @@ ; CHECK-NEXT: image_store ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float> %d1, i32 %c0, i32 %c1) { - call void @llvm.amdgcn.image.store.i32(<4 x float> %d0, i32 %c0, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 1, i1 0) + call void @llvm.amdgcn.image.store.i32(<4 x float> %d0, i32 %c0, <8 x i32> %rsrc, i32 15, i32 3) ; (1<<0 | 1<<1) unorm + glc call void @llvm.amdgcn.s.waitcnt(i32 3840) ; 0xf00 - call void @llvm.amdgcn.image.store.i32(<4 x float> %d1, i32 %c1, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 1, i1 0) + call void @llvm.amdgcn.image.store.i32(<4 x float> %d1, i32 %c1, <8 x i32> %rsrc, i32 15, i32 3) ; (1<<0 | 1<<1) unorm + glc ret void } @@ -22,17 +22,17 @@ ; CHECK: s_waitcnt ; CHECK-NEXT: image_store define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, i32 %c) { - %t = call <4 x float> @llvm.amdgcn.image.load.i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + %t = call <4 x float> @llvm.amdgcn.image.load.i32(i32 %c, <8 x i32> %rsrc, i32 15, i32 1) ; unorm call void @llvm.amdgcn.s.waitcnt(i32 3840) ; 0xf00 %c.1 = mul i32 %c, 2 - call void @llvm.amdgcn.image.store.i32(<4 x float> %t, i32 %c.1, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + call void @llvm.amdgcn.image.store.i32(<4 x float> %t, i32 %c.1, <8 x i32> %rsrc, i32 15, i32 1) ; unorm ret void } declare void @llvm.amdgcn.s.waitcnt(i32) #0 -declare <4 x float> @llvm.amdgcn.image.load.i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare void @llvm.amdgcn.image.store.i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0 +declare <4 x float> @llvm.amdgcn.image.load.i32(i32, <8 x i32>, i32, i32) #1 +declare void @llvm.amdgcn.image.store.i32(<4 x float>, i32, <8 x i32>, i32, i32) #0 attributes #0 = { nounwind } attributes #1 = { nounwind readonly } Index: test/CodeGen/AMDGPU/wqm.ll =================================================================== --- test/CodeGen/AMDGPU/wqm.ll +++ test/CodeGen/AMDGPU/wqm.ll @@ -7,8 +7,8 @@ ;CHECK-NOT: s_wqm define amdgpu_ps <4 x float> @test1(<8 x i32> inreg %rsrc, <4 x i32> %c) { main_body: - %tex = call <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) - call void @llvm.amdgcn.image.store.v4i32(<4 x float> %tex, <4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) + %tex = call <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i32 1) ; flag = unorm + call void @llvm.amdgcn.image.store.v4i32(<4 x float> %tex, <4 x i32> %c, <8 x i32> %rsrc, i32 15, i32 1) ; flag = unorm ret <4 x float> %tex } @@ -22,7 +22,7 @@ ;CHECK: _load_dword v0, define amdgpu_ps float @test2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) { main_body: - %c.1 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %c.1 = call <4 x float> @llvm.amdgcn.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0) ; flags = 0 %c.2 = bitcast <4 x float> %c.1 to <4 x i32> %c.3 = extractelement <4 x i32> %c.2, i32 0 %gep = getelementptr float, float addrspace(1)* %ptr, i32 %c.3 @@ -43,7 +43,7 @@ ;CHECK: .size test3 define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) { main_body: - %tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %tex = call <4 x float> @llvm.amdgcn.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0) %tex.1 = bitcast <4 x float> %tex to <4 x i32> %tex.2 = extractelement <4 x i32> %tex.1, i32 0 %gep = getelementptr float, float addrspace(1)* %ptr, i32 %tex.2 @@ -68,7 +68,7 @@ %c.1 = mul i32 %c, %d %gep = getelementptr float, float addrspace(1)* %ptr, i32 %c.1 store float %data, float addrspace(1)* %gep - %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %c.1, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %tex = call <4 x float> @llvm.amdgcn.image.sample.i32(i32 %c.1, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0) ret <4 x float> %tex } @@ -95,7 +95,7 @@ br i1 %cmp, label %IF, label %ELSE IF: - %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %tex = call <4 x float> @llvm.amdgcn.image.sample.i32(i32 %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0) %data.if = extractelement <4 x float> %tex, i32 0 br label %END @@ -135,7 +135,7 @@ br i1 %cmp, label %ELSE, label %IF IF: - %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %tex = call <4 x float> @llvm.amdgcn.image.sample.i32(i32 %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0) %data.if = extractelement <4 x float> %tex, i32 0 br label %END @@ -194,7 +194,7 @@ END: %coord.END = phi i32 [ %coord.IF, %IF ], [ %coord.ELSE, %ELSE ] - %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord.END, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %tex = call <4 x float> @llvm.amdgcn.image.sample.i32(i32 %coord.END, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0) ret <4 x float> %tex } @@ -212,7 +212,7 @@ ;CHECK: v_cmp define amdgpu_ps float @test_control_flow_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <3 x i32> %idx, <2 x float> %data, i32 %coord) { main_body: - %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %tex = call <4 x float> @llvm.amdgcn.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0) %tex.1 = extractelement <4 x float> %tex, i32 0 %idx.1 = extractelement <3 x i32> %idx, i32 0 @@ -270,7 +270,7 @@ br label %END END: - %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %tex = call <4 x float> @llvm.amdgcn.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0) ret <4 x float> %tex } @@ -293,7 +293,7 @@ ;CHECK: image_sample define amdgpu_ps <4 x float> @test_kill_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <2 x i32> %idx, <2 x float> %data, i32 %coord, i32 %coord2, float %z) { main_body: - %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %tex = call <4 x float> @llvm.amdgcn.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0) %idx.0 = extractelement <2 x i32> %idx, i32 0 %gep.0 = getelementptr float, float addrspace(1)* %ptr, i32 %idx.0 @@ -307,7 +307,7 @@ %data.1 = extractelement <2 x float> %data, i32 1 store float %data.1, float addrspace(1)* %gep.1 - %tex2 = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %tex2 = call <4 x float> @llvm.amdgcn.image.sample.i32(i32 %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0) %out = fadd <4 x float> %tex, %tex2 ret <4 x float> %out @@ -327,7 +327,7 @@ ; CHECK: v_cmpx_ define amdgpu_ps <4 x float> @test_kill_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %idx, float %data, i32 %coord, i32 %coord2, float %z) { main_body: - %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %tex = call <4 x float> @llvm.amdgcn.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0) %gep = getelementptr float, float addrspace(1)* %ptr, i32 %idx store float %data, float addrspace(1)* %gep @@ -350,12 +350,12 @@ ret float %s } -declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 +declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2 +declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32) #2 -declare <4 x float> @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3 -declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3 +declare <4 x float> @llvm.amdgcn.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i32) #3 +declare <4 x float> @llvm.amdgcn.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32) #3 declare void @llvm.AMDGPU.kill(float) declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)