diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -182,6 +182,23 @@ } } + // Optimize _offset away when 'offset' is zero + if (const auto *OffsetMappingInfo = + AMDGPU::getMIMGOffsetMappingInfo(ImageDimIntr->BaseOpcode)) { + if (auto *ConstantOffset = + dyn_cast(II.getOperand(ImageDimIntr->OffsetIndex))) { + if (ConstantOffset->isZero()) { + const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = + AMDGPU::getImageDimIntrinsicByBaseOpcode( + OffsetMappingInfo->NoOffset, ImageDimIntr->Dim); + return modifyIntrinsicCall( + II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { + Args.erase(Args.begin() + ImageDimIntr->OffsetIndex); + }); + } + } + } + // Try to use A16 or G16 if (!ST->hasA16() && !ST->hasG16()) return None; diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -147,6 +147,22 @@ let PrimaryKeyName = "getMIMGBiasMappingInfo"; } +class MIMGOffsetMapping { + MIMGBaseOpcode Offset = offset; + MIMGBaseOpcode NoOffset = nooffset; +} + +def MIMGOffsetMappingTable : GenericTable { + let FilterClass = "MIMGOffsetMapping"; + let CppTypeName = "MIMGOffsetMappingInfo"; + let Fields = ["Offset", "NoOffset"]; + string TypeOf_Offset = "MIMGBaseOpcode"; + string TypeOf_NoOffset = "MIMGBaseOpcode"; + + let PrimaryKey = ["Offset"]; + let PrimaryKeyName = "getMIMGOffsetMappingInfo"; +} + class MIMGG16Mapping { MIMGBaseOpcode G = g; MIMGBaseOpcode G16 = g16; @@ -1174,6 +1190,48 @@ def : MIMGBiasMapping; def : MIMGBiasMapping; +// Offset to NoOffset Optimization Mapping +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; +def : MIMGOffsetMapping; + // G to G16 Optimization Mapping def : MIMGG16Mapping; def : MIMGG16Mapping; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -336,6 +336,11 @@ MIMGBaseOpcode NoBias; }; +struct MIMGOffsetMappingInfo { + MIMGBaseOpcode Offset; + MIMGBaseOpcode NoOffset; +}; + struct MIMGG16MappingInfo { MIMGBaseOpcode G; MIMGBaseOpcode G16; @@ -350,6 +355,9 @@ LLVM_READONLY const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias); +LLVM_READONLY +const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset); + LLVM_READONLY const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -133,6 +133,7 @@ #define GET_MIMGLZMappingTable_IMPL #define GET_MIMGMIPMappingTable_IMPL #define GET_MIMGBiasMappingTable_IMPL +#define GET_MIMGOffsetMappingTable_IMPL #define GET_MIMGG16MappingTable_IMPL #include "AMDGPUGenSearchableTables.inc" diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -4630,6 +4630,571 @@ ret void } +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample offset zero +; -------------------------------------------------------------------- + +define amdgpu_kernel void @offset_sample_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; CHECK-LABEL: @offset_sample_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { +; CHECK-LABEL: @offset_sample_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { +; CHECK-LABEL: @offset_sample_c_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { +; CHECK-LABEL: @offset_sample_c_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) { +; CHECK-LABEL: @offset_sample_cl_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 0, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) { +; CHECK-LABEL: @offset_sample_cl_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) { +; CHECK-LABEL: @offset_sample_c_cl_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) { +; CHECK-LABEL: @offset_sample_c_cl_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_b_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) { +; CHECK-LABEL: @offset_sample_b_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { +; CHECK-LABEL: @offset_sample_b_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_b_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) { +; CHECK-LABEL: @offset_sample_c_b_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) { +; CHECK-LABEL: @offset_sample_c_b_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_b_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) { +; CHECK-LABEL: @offset_sample_b_cl_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_b_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) { +; CHECK-LABEL: @offset_sample_b_cl_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_b_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) { +; CHECK-LABEL: @offset_sample_c_b_cl_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_b_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { +; CHECK-LABEL: @offset_sample_c_b_cl_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_d_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { +; CHECK-LABEL: @offset_sample_d_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_d_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +; CHECK-LABEL: @offset_sample_d_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_d_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { +; CHECK-LABEL: @offset_sample_c_d_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_d_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +; CHECK-LABEL: @offset_sample_c_d_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_d_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { +; CHECK-LABEL: @offset_sample_d_cl_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_d_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { +; CHECK-LABEL: @offset_sample_d_cl_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_d_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { +; CHECK-LABEL: @offset_sample_c_d_cl_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_d_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { +; CHECK-LABEL: @offset_sample_c_d_cl_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_cd_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { +; CHECK-LABEL: @offset_sample_cd_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_cd_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +; CHECK-LABEL: @offset_sample_cd_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_cd_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { +; CHECK-LABEL: @offset_sample_c_cd_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_cd_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +; CHECK-LABEL: @offset_sample_c_cd_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_cd_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { +; CHECK-LABEL: @offset_sample_cd_cl_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_cd_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { +; CHECK-LABEL: @offset_sample_cd_cl_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_cd_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { +; CHECK-LABEL: @offset_sample_c_cd_cl_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_cd_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { +; CHECK-LABEL: @offset_sample_c_cd_cl_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_l_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { +; CHECK-LABEL: @offset_sample_l_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 0, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { +; CHECK-LABEL: @offset_sample_l_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_l_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { +; CHECK-LABEL: @offset_sample_c_l_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { +; CHECK-LABEL: @offset_sample_c_l_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_lz_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; CHECK-LABEL: @offset_sample_lz_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_lz_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { +; CHECK-LABEL: @offset_sample_lz_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_lz_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { +; CHECK-LABEL: @offset_sample_c_lz_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @offset_sample_c_lz_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { +; CHECK-LABEL: @offset_sample_c_lz_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + ; -------------------------------------------------------------------- ; llvm.amdgcn.is.shared ; --------------------------------------------------------------------