Index: lib/Target/AMDGPU/MIMGInstructions.td =================================================================== --- lib/Target/AMDGPU/MIMGInstructions.td +++ lib/Target/AMDGPU/MIMGInstructions.td @@ -101,6 +101,22 @@ let PrimaryKeyName = "getMIMGLZMappingInfo"; } +class MIMGMIPMapping { + MIMGBaseOpcode MIP = mip; + MIMGBaseOpcode NONMIP = nonmip; +} + +def MIMGMIPMappingTable : GenericTable { + let FilterClass = "MIMGMIPMapping"; + let CppTypeName = "MIMGMIPMappingInfo"; + let Fields = ["MIP", "NONMIP"]; + GenericEnum TypeOf_MIP = MIMGBaseOpcode; + GenericEnum TypeOf_NONMIP = MIMGBaseOpcode; + + let PrimaryKey = ["MIP"]; + let PrimaryKeyName = "getMIMGMIPMappingInfo"; +} + class MIMG : InstSI { @@ -808,3 +824,7 @@ def : MIMGLZMapping; def : MIMGLZMapping; def : MIMGLZMapping; + +// MIP to NONMIP Optimization Mapping +def : MIMGMIPMapping; +def : MIMGMIPMapping; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -4863,6 +4863,8 @@ const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim); const AMDGPU::MIMGLZMappingInfo *LZMappingInfo = AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode); + const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo = + AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode); unsigned IntrOpcode = Intr->BaseOpcode; bool IsGFX10 = Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10; @@ -4966,6 +4968,17 @@ } } + // Optimize _mip away, when 'lod' is zero + if (MIPMappingInfo) { + if (auto ConstantLod = + dyn_cast(Op.getOperand(AddrIdx+NumVAddrs-1))) { + if (ConstantLod->isNullValue()) { + IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip + NumMIVAddrs--; // remove 'lod' + } + } + } + // Check for 16 bit addresses and pack if true. unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs; MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType(); Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -45,6 +45,7 @@ #define GET_MIMGDim_DECL #define GET_MIMGEncoding_DECL #define GET_MIMGLZMapping_DECL +#define GET_MIMGMIPMapping_DECL #include "AMDGPUGenSearchableTables.inc" namespace IsaInfo { @@ -218,9 +219,17 @@ MIMGBaseOpcode LZ; }; +struct MIMGMIPMappingInfo { + MIMGBaseOpcode MIP; + MIMGBaseOpcode NONMIP; +}; + LLVM_READONLY const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); +LLVM_READONLY +const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L); + LLVM_READONLY int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords); Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -104,6 +104,7 @@ #define GET_MIMGDimInfoTable_IMPL #define GET_MIMGInfoTable_IMPL #define GET_MIMGLZMappingTable_IMPL +#define GET_MIMGMIPMappingTable_IMPL #include "AMDGPUGenSearchableTables.inc" int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, Index: test/CodeGen/AMDGPU/image_ls_mipmap_zero.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/image_ls_mipmap_zero.ll @@ -0,0 +1,132 @@ +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s + + +; GCN-LABEL: {{^}}load_mip_1d: +; GCN-NOT: image_load_mip +; GCN: image_load +define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}load_mip_2d: +; GCN-NOT: image_load_mip +; GCN: image_load +define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}load_mip_3d: +; GCN-NOT: image_load_mip +; GCN: image_load +define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}load_mip_1darray: +; GCN-NOT: image_load_mip +; GCN: image_load +define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}load_mip_2darray: +; GCN-NOT: image_load_mip +; GCN: image_load +define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}load_mip_cube: +; GCN-NOT: image_load_mip +; GCN: image_load +define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + + + +; GCN-LABEL: {{^}}store_mip_1d: +; GCN-NOT: image_store_mip +; GCN: image_store +define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { +main_body: + call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +; GCN-LABEL: {{^}}store_mip_2d: +; GCN-NOT: image_store_mip +; GCN: image_store +define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { +main_body: + call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +; GCN-LABEL: {{^}}store_mip_3d: +; GCN-NOT: image_store_mip +; GCN: image_store +define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { +main_body: + call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +; GCN-LABEL: {{^}}store_mip_1darray: +; GCN-NOT: image_store_mip +; GCN: image_store +define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { +main_body: + call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +; GCN-LABEL: {{^}}store_mip_2darray: +; GCN-NOT: image_store_mip +; GCN: image_store +define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { +main_body: + call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +; GCN-LABEL: {{^}}store_mip_cube: +; GCN-NOT: image_store_mip +; GCN: image_store +define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { +main_body: + call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 + + +declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +