diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -744,8 +744,8 @@ list AllNumAddrWords = !foreach(dw, !if(sample.Gradients, !if(!eq(sample.LodOrClamp, ""), - [2, 3, 4, 5, 6, 7, 9], - [2, 3, 4, 5, 7, 8, 10]), + [2, 3, 4, 5, 6, 7, 8, 9], + [2, 3, 4, 5, 6, 7, 8, 9, 10]), !if(!eq(sample.LodOrClamp, ""), [1, 2, 3], [1, 2, 3, 4])), diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6085,56 +6085,73 @@ MVT VAddrVT = Op.getOperand(ArgOffset + Intr->GradientStart).getSimpleValueType(); MVT VAddrScalarVT = VAddrVT.getScalarType(); - MVT PackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16; + MVT GradPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16; IsG16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16; VAddrVT = Op.getOperand(ArgOffset + Intr->CoordStart).getSimpleValueType(); VAddrScalarVT = VAddrVT.getScalarType(); + MVT AddrPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16; IsA16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16; - if (IsA16 || IsG16) { - if (IsA16) { - if (!ST->hasA16()) { - LLVM_DEBUG(dbgs() << "Failed to lower image intrinsic: Target does not " - "support 16 bit addresses\n"); - return Op; - } - if (!IsG16) { - LLVM_DEBUG( - dbgs() << "Failed to lower image intrinsic: 16 bit addresses " - "need 16 bit derivatives but got 32 bit derivatives\n"); - return Op; - } - } else if (!ST->hasG16()) { - LLVM_DEBUG(dbgs() << "Failed to lower image intrinsic: Target does not " - "support 16 bit derivatives\n"); + + bool IsTiedG16 = false; + if (BaseOpcode->Gradients && !ST->hasG16()) { + // 16 bit gradients are supported, but are tied to the A16 control + if (IsA16 && !IsG16) { + LLVM_DEBUG( + dbgs() << "Failed to lower image intrinsic: 16 bit addresses " + "need 16 bit derivatives but got 32 bit derivatives\n"); return Op; } + if (!IsA16 && IsG16) { + LLVM_DEBUG( + dbgs() + << "Failed to lower image intrinsic: 16 bit addresses " + "need 16 bit addresses for 16 bit derivatives (got 32 bit)"); + return Op; + } + if (IsA16 && IsG16) + IsTiedG16 = true; + } - if (BaseOpcode->Gradients && !IsA16) { - if (!ST->hasG16()) { - LLVM_DEBUG(dbgs() << "Failed to lower image intrinsic: Target does not " - "support 16 bit derivatives\n"); - return Op; - } - // Activate g16 - const AMDGPU::MIMGG16MappingInfo *G16MappingInfo = - AMDGPU::getMIMGG16MappingInfo(Intr->BaseOpcode); - IntrOpcode = G16MappingInfo->G16; // set new opcode to variant with _g16 + if (IsA16) { + if (!ST->hasA16()) { + LLVM_DEBUG(dbgs() << "Failed to lower image intrinsic: Target does not " + "support 16 bit addresses\n"); + return Op; } + } - // Don't compress addresses for G16 - const int PackEndIdx = IsA16 ? VAddrEnd : (ArgOffset + Intr->CoordStart); - packImageA16AddressToDwords(DAG, Op, PackVectorVT, VAddrs, - ArgOffset + Intr->GradientStart, PackEndIdx, - Intr->NumGradients); + // We've dealt with incorrect input so we know that if IsA16, IsG16 or + // IsTiedG16 are set then we have to compress/pack operands (either address, + // gradient or both) + if (BaseOpcode->Gradients && IsG16 && !IsTiedG16) { + // Activate g16 + const AMDGPU::MIMGG16MappingInfo *G16MappingInfo = + AMDGPU::getMIMGG16MappingInfo(Intr->BaseOpcode); + IntrOpcode = G16MappingInfo->G16; // set new opcode to variant with _g16 + } - if (!IsA16) { - // Add uncompressed address - for (unsigned I = ArgOffset + Intr->CoordStart; I < VAddrEnd; I++) - VAddrs.push_back(Op.getOperand(I)); - } + // Add gradients (packed or unpacked) + if (IsG16) { + // Pack the gradients + // const int PackEndIdx = IsA16 ? VAddrEnd : (ArgOffset + Intr->CoordStart); + packImageA16AddressToDwords( + DAG, Op, GradPackVectorVT, VAddrs, ArgOffset + Intr->GradientStart, + ArgOffset + Intr->CoordStart, Intr->NumGradients); + } else { + for (unsigned I = ArgOffset + Intr->GradientStart; + I < ArgOffset + Intr->CoordStart; I++) + VAddrs.push_back(Op.getOperand(I)); + } + + // Add addresses (packed or unpacked) + if (IsA16) { + packImageA16AddressToDwords(DAG, Op, AddrPackVectorVT, VAddrs, + ArgOffset + Intr->CoordStart, VAddrEnd, + 0 /* No gradients */); } else { - for (unsigned I = ArgOffset + Intr->GradientStart; I < VAddrEnd; I++) + // Add uncompressed address + for (unsigned I = ArgOffset + Intr->CoordStart; I < VAddrEnd; I++) VAddrs.push_back(Op.getOperand(I)); } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll @@ -542,7 +542,7 @@ ; ; GFX10-LABEL: sample_d_1d: ; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -573,7 +573,7 @@ ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 ; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 ; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0 -; GFX10-NEXT: image_sample_d v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -607,7 +607,7 @@ ; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-NEXT: image_sample_d v[0:3], [v0, v2, v3, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 +; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -624,7 +624,7 @@ ; ; GFX10-LABEL: sample_c_d_1d: ; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -657,7 +657,7 @@ ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 -; GFX10-NEXT: image_sample_c_d v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -678,7 +678,7 @@ ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 -; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -709,7 +709,7 @@ ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-NEXT: image_sample_d_cl v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -730,7 +730,7 @@ ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 -; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -763,7 +763,7 @@ ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 -; GFX10-NEXT: image_sample_c_d_cl v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -780,7 +780,7 @@ ; ; GFX10-LABEL: sample_cd_1d: ; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -811,7 +811,7 @@ ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 ; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 ; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0 -; GFX10-NEXT: image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -828,7 +828,7 @@ ; ; GFX10-LABEL: sample_c_cd_1d: ; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -861,7 +861,7 @@ ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 -; GFX10-NEXT: image_sample_c_cd v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -882,7 +882,7 @@ ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 -; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -913,7 +913,7 @@ ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-NEXT: image_sample_cd_cl v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -934,7 +934,7 @@ ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 -; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -967,7 +967,7 @@ ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 -; GFX10-NEXT: image_sample_c_cd_cl v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -1163,7 +1163,7 @@ ; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 -; GFX10-NEXT: image_sample_c_d_o v0, [v0, v1, v2, v4, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 +; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -1197,7 +1197,7 @@ ; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 -; GFX10-NEXT: image_sample_c_d_o v[0:1], [v0, v1, v2, v4, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 +; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll @@ -0,0 +1,555 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s +; Depends on some other changes to pass this test - those are in review separately +; XFAIL: * + +define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s) { +; GFX10-LABEL: sample_d_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { +; GFX10-LABEL: sample_d_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 +; GFX10-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r) { +; GFX10-LABEL: sample_d_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6 +; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 +; GFX10-NEXT: image_sample_d v[0:3], [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s) { +; GFX10-LABEL: sample_c_d_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { +; GFX10-LABEL: sample_c_d_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 +; GFX10-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) { +; GFX10-LABEL: sample_d_cl_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 +; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { +; GFX10-LABEL: sample_d_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 +; GFX10-NEXT: image_sample_d_cl v[0:3], [v0, v1, v2, v3, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) { +; GFX10-LABEL: sample_c_d_cl_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 +; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { +; GFX10-LABEL: sample_c_d_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 +; GFX10-NEXT: image_sample_c_d_cl v[0:3], [v0, v1, v2, v3, v4, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s) { +; GFX10-LABEL: sample_cd_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { +; GFX10-LABEL: sample_cd_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 +; GFX10-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s) { +; GFX10-LABEL: sample_c_cd_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { +; GFX10-LABEL: sample_c_cd_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 +; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) { +; GFX10-LABEL: sample_cd_cl_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 +; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { +; GFX10-LABEL: sample_cd_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 +; GFX10-NEXT: image_sample_cd_cl v[0:3], [v0, v1, v2, v3, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) { +; GFX10-LABEL: sample_c_cd_cl_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 +; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { +; GFX10-LABEL: sample_c_cd_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 +; GFX10-NEXT: image_sample_c_cd_cl v[0:3], [v0, v1, v2, v3, v4, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) { +; GFX10-LABEL: sample_c_d_o_2darray_V1: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6 +; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 +; GFX10-NEXT: image_sample_c_d_o v0, [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret float %v +} + +define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) { +; GFX10-LABEL: sample_c_d_o_2darray_V2: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6 +; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 +; GFX10-NEXT: image_sample_c_d_o v[0:1], [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <2 x float> %v +} + +declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32, float, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +define amdgpu_ps <4 x float> @sample_g16_noa16_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { +; GFX10-LABEL: sample_g16_noa16_d_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_g16_noa16_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { +; GFX10-LABEL: sample_g16_noa16_d_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff +; GFX10-NEXT: v_and_b32_e32 v2, v6, v2 +; GFX10-NEXT: v_and_b32_e32 v0, v6, v0 +; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 +; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_g16_noa16_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) { +; GFX10-LABEL: sample_g16_noa16_d_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff +; GFX10-NEXT: v_and_b32_e32 v3, v9, v3 +; GFX10-NEXT: v_and_b32_e32 v0, v9, v0 +; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 +; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { +; GFX10-LABEL: sample_g16_noa16_c_d_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { +; GFX10-LABEL: sample_g16_noa16_c_d_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff +; GFX10-NEXT: v_and_b32_e32 v3, v7, v3 +; GFX10-NEXT: v_and_b32_e32 v1, v7, v1 +; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 +; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 +; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { +; GFX10-LABEL: sample_g16_noa16_d_cl_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { +; GFX10-LABEL: sample_g16_noa16_d_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff +; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 +; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 +; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 +; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { +; GFX10-LABEL: sample_g16_noa16_c_d_cl_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { +; GFX10-LABEL: sample_g16_noa16_c_d_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff +; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 +; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 +; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 +; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 +; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_g16_noa16_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { +; GFX10-LABEL: sample_g16_noa16_cd_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_g16_noa16_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { +; GFX10-LABEL: sample_g16_noa16_cd_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff +; GFX10-NEXT: v_and_b32_e32 v2, v6, v2 +; GFX10-NEXT: v_and_b32_e32 v0, v6, v0 +; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 +; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { +; GFX10-LABEL: sample_g16_noa16_c_cd_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { +; GFX10-LABEL: sample_g16_noa16_c_cd_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff +; GFX10-NEXT: v_and_b32_e32 v3, v7, v3 +; GFX10-NEXT: v_and_b32_e32 v1, v7, v1 +; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 +; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 +; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { +; GFX10-LABEL: sample_g16_noa16_cd_cl_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { +; GFX10-LABEL: sample_g16_noa16_cd_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff +; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 +; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 +; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 +; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { +; GFX10-LABEL: sample_g16_noa16_c_cd_cl_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { +; GFX10-LABEL: sample_g16_noa16_c_cd_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff +; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 +; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 +; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 +; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 +; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps float @sample_g16_noa16_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { +; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V1: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff +; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 +; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 +; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 +; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 +; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret float %v +} + +define amdgpu_ps <2 x float> @sample_g16_noa16_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { +; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V2: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff +; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 +; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 +; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 +; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 +; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +main_body: + %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <2 x float> %v +} + +declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone }