diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -508,12 +508,14 @@ // Represent texture/image types / dimensionality. class AMDGPUDimProps enc, string name, string asmsuffix, - list coord_names, list slice_names> { + list coord_names, list slice_names, + bit msaa = 0> { AMDGPUDimProps Dim = !cast(NAME); string Name = name; // e.g. "2darraymsaa" string AsmSuffix = asmsuffix; // e.g. 2D_MSAA_ARRAY (used in assembly strings) bits<3> Encoding = enc; bit DA = 0; // DA bit in MIMG encoding + bit MSAA = msaa; list CoordSliceArgs = makeArgList.ret; @@ -536,9 +538,9 @@ def AMDGPUDim1DArray : AMDGPUDimProps<0x4, "1darray", "1D_ARRAY", ["s"], ["slice"]>; def AMDGPUDim2DArray : AMDGPUDimProps<0x5, "2darray", "2D_ARRAY", ["s", "t"], ["slice"]>; } -def AMDGPUDim2DMsaa : AMDGPUDimProps<0x6, "2dmsaa", "2D_MSAA", ["s", "t"], ["fragid"]>; +def AMDGPUDim2DMsaa : AMDGPUDimProps<0x6, "2dmsaa", "2D_MSAA", ["s", "t"], ["fragid"], 1>; let DA = 1 in { - def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<0x7, "2darraymsaa", "2D_MSAA_ARRAY", ["s", "t"], ["slice", "fragid"]>; + def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<0x7, "2darraymsaa", "2D_MSAA_ARRAY", ["s", "t"], ["slice", "fragid"], 1>; } def AMDGPUDims { @@ -798,10 +800,15 @@ "STORE_MIP", [], [AMDGPUArg], [IntrWriteMem, IntrWillReturn], [SDNPMemOperand], 1>; - defm int_amdgcn_image_msaa_load_x - : AMDGPUImageDimIntrinsicsAll<"MSAA_LOAD_X", [llvm_any_ty], [], [IntrReadMem], - [SDNPMemOperand]>, - AMDGPUImageDMaskIntrinsic; + ////////////////////////////////////////////////////////////////////////// + // MSAA intrinsics + ////////////////////////////////////////////////////////////////////////// + foreach dim = AMDGPUDims.Msaa in { + def int_amdgcn_image_msaa_load_x # _ # dim.Name: + AMDGPUImageDimIntrinsic< + AMDGPUDimNoSampleProfile<"MSAA_LOAD_X", dim, [llvm_any_ty], []>, + [IntrReadMem], [SDNPMemOperand]>; + } ////////////////////////////////////////////////////////////////////////// // sample and getlod intrinsics diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1545,6 +1545,7 @@ bool validateMIMGAddrSize(const MCInst &Inst); bool validateMIMGD16(const MCInst &Inst); bool validateMIMGDim(const MCInst &Inst); + bool validateMIMGMSAA(const MCInst &Inst); bool validateLdsDirect(const MCInst &Inst); bool validateOpSel(const MCInst &Inst); bool validateVccOperand(unsigned Reg) const; @@ -3492,6 +3493,29 @@ return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; } +bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { + const unsigned Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) + return true; + + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); + const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = + AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); + + if (!BaseOpcode->MSAA) + return true; + + int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); + assert(DimIdx != -1); + + unsigned Dim = Inst.getOperand(DimIdx).getImm(); + const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); + + return DimInfo->MSAA; +} + static bool IsMovrelsSDWAOpcode(const unsigned Opcode) { switch (Opcode) { @@ -4128,6 +4152,10 @@ Error(IDLoc, "dim modifier is required on this GPU"); return false; } + if (!validateMIMGMSAA(Inst)) { + Error(IDLoc, "invalid dim; must be MSAA type"); + return false; + } if (!validateMIMGDataSize(Inst)) { Error(IDLoc, "image data size does not match dmask and tfe"); diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -42,6 +42,7 @@ bit LodOrClampOrMip = 0; bit HasD16 = 0; bit IsAtomicRet = 0; + bit MSAA = 0; } def MIMGBaseOpcode : GenericEnum { @@ -53,7 +54,7 @@ let CppTypeName = "MIMGBaseOpcodeInfo"; let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler", "Gather4", "NumExtraArgs", "Gradients", "G16", "Coordinates", - "LodOrClampOrMip", "HasD16"]; + "LodOrClampOrMip", "HasD16", "MSAA"]; string TypeOf_BaseOpcode = "MIMGBaseOpcode"; let PrimaryKey = ["BaseOpcode"]; @@ -67,7 +68,7 @@ def MIMGDimInfoTable : GenericTable { let FilterClass = "AMDGPUDimProps"; let CppTypeName = "MIMGDimInfo"; - let Fields = ["Dim", "NumCoords", "NumGradients", "DA", "Encoding", "AsmSuffix"]; + let Fields = ["Dim", "NumCoords", "NumGradients", "MSAA", "DA", "Encoding", "AsmSuffix"]; string TypeOf_Dim = "MIMGDim"; let PrimaryKey = ["Dim"]; @@ -364,6 +365,7 @@ let Coordinates = !not(isResInfo); let LodOrClampOrMip = mip; let HasD16 = has_d16; + let MSAA = msaa; } let BaseOpcode = !cast(NAME), diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -284,6 +284,7 @@ bool Coordinates; bool LodOrClampOrMip; bool HasD16; + bool MSAA; }; LLVM_READONLY @@ -293,6 +294,7 @@ MIMGDim Dim; uint8_t NumCoords; uint8_t NumGradients; + bool MSAA; bool DA; uint8_t Encoding; const char *AsmSuffix; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll @@ -1,111 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s -; GCN-LABEL: {{^}}load_1d: -; GFX10: image_msaa_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; -define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, i32 %s) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}load_1d_tfe: -; GFX10: image_msaa_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm tfe ; -define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { -main_body: - %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) - %v.vec = extractvalue {<4 x float>, i32} %v, 0 - %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 - ret <4 x float> %v.vec -} - -; GCN-LABEL: {{^}}load_1d_lwe: -; GFX10: image_msaa_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ; -define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { -main_body: - %v = call {<4 x float>, i32} @llvm.amdgcn.image.msaa.load.x.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 2, i32 0) - %v.vec = extractvalue {<4 x float>, i32} %v, 0 - %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 - ret <4 x float> %v.vec -} - -; GCN-LABEL: {{^}}load_2d: -; GFX10: image_msaa_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; -define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}load_2d_tfe: -; GFX10: image_msaa_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; -define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) { -main_body: - %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) - %v.vec = extractvalue {<4 x float>, i32} %v, 0 - %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 - ret <4 x float> %v.vec -} - -; GCN-LABEL: {{^}}load_3d: -; GFX10: image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; -define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}load_3d_tfe_lwe: -; GFX10: image_msaa_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe ; -define amdgpu_ps <4 x float> @load_3d_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %r) { -main_body: - %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.3d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0) - %v.vec = extractvalue {<4 x float>, i32} %v, 0 - %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 - ret <4 x float> %v.vec -} - -; GCN-LABEL: {{^}}load_1darray: -; GFX10: image_msaa_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; -define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}load_1darray_tfe: -; GFX10: image_msaa_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm tfe ; -define amdgpu_ps <4 x float> @load_1darray_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %slice) { -main_body: - %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1darray.v4f32i32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 1, i32 0) - %v.vec = extractvalue {<4 x float>, i32} %v, 0 - %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 - ret <4 x float> %v.vec -} - -; GCN-LABEL: {{^}}load_2darray: -; GFX10: image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; -define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}load_2darray_lwe: -; GFX10: image_msaa_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm lwe ; -define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) { -main_body: - %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2darray.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0) - %v.vec = extractvalue {<4 x float>, i32} %v, 0 - %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 - ret <4 x float> %v.vec -} - ; GCN-LABEL: {{^}}load_2dmsaa: ; GFX10: image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ; define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) { @@ -144,110 +38,98 @@ ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask3: -; GFX10: image_msaa_load v[0:3], v{{[0-9]+}}, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe ; -define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask3(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +; GCN-LABEL: {{^}}load_2dmsaa_tfe_V4_dmask3: +; GFX10: image_msaa_load v[0:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe ; +define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask3(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) { main_body: - %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1d.v4f32i32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) + %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32 7, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 store i32 %v.err, i32 addrspace(1)* %out, align 4 ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask2: -; GFX10: image_msaa_load v[0:2], v{{[0-9]+}}, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm tfe ; -define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask2(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +; GCN-LABEL: {{^}}load_2dmsaa_tfe_V4_dmask2: +; GFX10: image_msaa_load v[0:2], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe ; +define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask2(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) { main_body: - %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1d.v4f32i32.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) + %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32 6, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 store i32 %v.err, i32 addrspace(1)* %out, align 4 ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask1: -; GFX10: image_msaa_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ; -define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +; GCN-LABEL: {{^}}load_2dmsaa_tfe_V4_dmask1: +; GFX10: image_msaa_load v[0:1], [v4, v3, v2], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe ; +define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) { main_body: - %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1d.v4f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) + %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32 8, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 store i32 %v.err, i32 addrspace(1)* %out, align 4 ret <4 x float> %v.vec } -; GCN-LABEL: {{^}}load_1d_tfe_V2_dmask1: -; GFX10: image_msaa_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ; -define amdgpu_ps <2 x float> @load_1d_tfe_V2_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +; GCN-LABEL: {{^}}load_2dmsaa_tfe_V2_dmask1: +; GFX10: image_msaa_load v[0:1], [v4, v3, v2], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe ; +define amdgpu_ps <2 x float> @load_2dmsaa_tfe_V2_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) { main_body: - %v = call {<2 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1d.v2f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) + %v = call {<2 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32i32.i32(i32 8, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<2 x float>, i32} %v, 0 %v.err = extractvalue {<2 x float>, i32} %v, 1 store i32 %v.err, i32 addrspace(1)* %out, align 4 ret <2 x float> %v.vec } -; GCN-LABEL: {{^}}load_1d_V1: -; GFX10: image_msaa_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm ; -define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) { +; GCN-LABEL: {{^}}load_2dmsaa_V1: +; GFX10: image_msaa_load v0, v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA unorm ; +define amdgpu_ps float @load_2dmsaa_V1(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) { main_body: - %v = call float @llvm.amdgcn.image.msaa.load.x.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %v = call float @llvm.amdgcn.image.msaa.load.x.2dmsaa.f32.i32(i32 8, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) ret float %v } -; GCN-LABEL: {{^}}load_1d_V2: -; GFX10: image_msaa_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm ; -define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) { +; GCN-LABEL: {{^}}load_2dmsaa_V2: +; GFX10: image_msaa_load v[0:1], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_2D_MSAA unorm ; +define amdgpu_ps <2 x float> @load_2dmsaa_V2(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) { main_body: - %v = call <2 x float> @llvm.amdgcn.image.msaa.load.x.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %v = call <2 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32.i32(i32 9, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x float> %v } -; GCN-LABEL: {{^}}load_1d_glc: -; GFX10: image_msaa_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc ; -define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) { +; GCN-LABEL: {{^}}load_2dmsaa_glc: +; GFX10: image_msaa_load v[0:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm glc ; +define amdgpu_ps <4 x float> @load_2dmsaa_glc(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) { main_body: - %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) + %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 1) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_1d_slc: -; GFX10: image_msaa_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc ; -define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) { +; GCN-LABEL: {{^}}load_2dmsaa_slc: +; GFX10: image_msaa_load v[0:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm slc ; +define amdgpu_ps <4 x float> @load_2dmsaa_slc(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) { main_body: - %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2) + %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 2) ret <4 x float> %v } -; GCN-LABEL: {{^}}load_1d_glc_slc: -; GFX10: image_msaa_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc ; -define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) { +; GCN-LABEL: {{^}}load_2dmsaa_glc_slc: +; GFX10: image_msaa_load v[0:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm glc slc ; +define amdgpu_ps <4 x float> @load_2dmsaa_glc_slc(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) { main_body: - %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3) + %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 3) ret <4 x float> %v } -declare <4 x float> @llvm.amdgcn.image.msaa.load.x.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 -declare {float,i32} @llvm.amdgcn.image.msaa.load.x.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1 -declare {<2 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1 -declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.msaa.load.x.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 -declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.msaa.load.x.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 -declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.3d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.msaa.load.x.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 -declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1darray.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.msaa.load.x.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 -declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2darray.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 declare <4 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 declare <4 x float> @llvm.amdgcn.image.msaa.load.x.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 -declare float @llvm.amdgcn.image.msaa.load.x.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1 -declare float @llvm.amdgcn.image.msaa.load.x.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 -declare <2 x float> @llvm.amdgcn.image.msaa.load.x.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1 +declare float @llvm.amdgcn.image.msaa.load.x.2dmsaa.f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <2 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare {<2 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readonly } diff --git a/llvm/test/MC/AMDGPU/gfx1030_err.s b/llvm/test/MC/AMDGPU/gfx1030_err.s --- a/llvm/test/MC/AMDGPU/gfx1030_err.s +++ b/llvm/test/MC/AMDGPU/gfx1030_err.s @@ -143,3 +143,9 @@ global_atomic_csub v2, v[0:1], v2, off offset:100 slc // GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc + +image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid dim; must be MSAA type + +image_msaa_load v5, v[1:2], s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_2D d16 +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid dim; must be MSAA type diff --git a/llvm/test/MC/AMDGPU/gfx1030_new.s b/llvm/test/MC/AMDGPU/gfx1030_new.s --- a/llvm/test/MC/AMDGPU/gfx1030_new.s +++ b/llvm/test/MC/AMDGPU/gfx1030_new.s @@ -105,17 +105,17 @@ image_bvh64_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19], s[12:15] a16 // GFX10: encoding: [0x05,0x9f,0x9c,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13] -image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D -// GFX10: encoding: [0x01,0x0f,0x00,0xf0,0x05,0x01,0x02,0x00] +image_msaa_load v[1:4], v[5:7], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA +// GFX10: encoding: [0x31,0x0f,0x00,0xf0,0x05,0x01,0x02,0x00] -image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D glc -// GFX10: encoding: [0x01,0x2f,0x00,0xf0,0x05,0x01,0x02,0x00] +image_msaa_load v[1:4], v[5:7], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA glc +// GFX10: encoding: [0x31,0x2f,0x00,0xf0,0x05,0x01,0x02,0x00] -image_msaa_load v5, v[1:2], s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_2D d16 -// GFX10: encoding: [0x09,0x01,0x00,0xf0,0x01,0x05,0x02,0x80] +image_msaa_load v5, v[1:3], s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA d16 +// GFX10: encoding: [0x31,0x01,0x00,0xf0,0x01,0x05,0x02,0x80] -image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D -// GFX10: encoding: [0x01,0x0f,0x00,0xf0,0x05,0x01,0x02,0x00] +image_msaa_load v[1:4], v[5:8], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +// GFX10: encoding: [0x39,0x0f,0x00,0xf0,0x05,0x01,0x02,0x00] image_msaa_load v14, [v204,v11,v14,v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY // GFX10: encoding: [0x3b,0x01,0x00,0xf0,0xcc,0x0e,0x0a,0x00,0x0b,0x0e,0x13,0x00]