diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7208,6 +7208,13 @@ unsigned IntrID = cast(Op.getOperand(1))->getZExtValue(); SDLoc DL(Op); + auto LowerDefaultCase = [&]() -> SDValue { + if (auto ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrID)) { + return lowerImage(Op, ImageDimIntr, DAG, true); + } + return {}; + }; + switch (IntrID) { case Intrinsic::amdgcn_ds_ordered_add: case Intrinsic::amdgcn_ds_ordered_swap: { @@ -7826,13 +7833,31 @@ M->getVTList(), Ops, M->getMemoryVT(), M->getMemOperand()); } + case Intrinsic::amdgcn_image_sample_lz_2d: { + if (AMDGPU::isGFX90A(*Subtarget)) { + // Replace `image_sample_lz_2d` with `image_sample_2d`. + auto IntrinsicIDType = Op.getOperand(1u)->getValueType(0u); + auto NewIntrinsicID = DAG.getConstant( + Intrinsic::amdgcn_image_sample_2d, DL, IntrinsicIDType + ); + auto const NumOps = Op.getNumOperands(); + auto Ops = SmallVector(); + Ops.push_back(Op.getOperand(0u)); // chain + Ops.push_back(NewIntrinsicID); + for (auto i = 2u; i < NumOps; ++i) { + Ops.push_back(Op.getOperand(i)); + } + auto Opcode = Op.getOpcode(); + auto VTs = Op.getNode()->getVTList(); + auto MemNode = cast(Op); + return DAG.getMemIntrinsicNode(Opcode, DL, VTs, Ops, + MemNode->getMemoryVT(), + MemNode->getMemOperand()); + } + return LowerDefaultCase(); + } default: - - if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = - AMDGPU::getImageDimIntrinsicInfo(IntrID)) - return lowerImage(Op, ImageDimIntr, DAG, true); - - return SDValue(); + return LowerDefaultCase(); } } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll @@ -66,9 +66,19 @@ ret <4 x float> %v } +; GFX90A-LABEL: {{^}}sample_lz_2d: +; GFX90A-NOT: s_wqm_b64 +; GFX90A: image_sample v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf +define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) +declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)