diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4825,18 +4825,25 @@ const bool IsA16 = MRI.getType(RayDir).getElementType().getSizeInBits() == 16; const bool Is64 = MRI.getType(NodePtr).getSizeInBits() == 64; - const unsigned NumVAddrs = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11); - const bool UseNSA = ST.hasNSAEncoding() && NumVAddrs <= ST.getNSAMaxSize(); - const unsigned Opcodes[2][2][2] = { - {{AMDGPU::IMAGE_BVH_INTERSECT_RAY_sa, - AMDGPU::IMAGE_BVH64_INTERSECT_RAY_sa}, - {AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa, - AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa}}, - {{AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa, - AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa}, - {AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa, - AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa}}}; - const unsigned Opcode = Opcodes[UseNSA][IsA16][Is64]; + const unsigned NumVDataDwords = 4; + const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11); + const bool UseNSA = + ST.hasNSAEncoding() && NumVAddrDwords <= ST.getNSAMaxSize(); + const unsigned BaseOpcodes[2][2] = { + {AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16}, + {AMDGPU::IMAGE_BVH64_INTERSECT_RAY, + AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}}; + int Opcode; + if (UseNSA) { + Opcode = + AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16], AMDGPU::MIMGEncGfx10NSA, + NumVDataDwords, NumVAddrDwords); + } else { + Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16], + AMDGPU::MIMGEncGfx10Default, NumVDataDwords, + PowerOf2Ceil(NumVAddrDwords)); + } + assert(Opcode != -1); SmallVector Ops; if (Is64) { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -693,22 +693,21 @@ int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::d16); + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); + const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = + AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); + assert(VDataIdx != -1); - if (DMaskIdx == -1 || TFEIdx == -1) {// intersect_ray + if (BaseOpcode->BVH) { + // Add A16 operand for intersect_ray instructions if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16) > -1) { - assert(MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa || - MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa || - MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa || - MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa); addOperand(MI, MCOperand::createImm(1)); } return MCDisassembler::Success; } - const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); bool IsAtomic = (VDstIdx != -1); bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4; - bool IsNSA = false; unsigned AddrSize = Info->VAddrDwords; @@ -717,8 +716,6 @@ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim); int A16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16); - const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = - AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); const AMDGPU::MIMGDimInfo *Dim = AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm()); const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm()); diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -43,6 +43,7 @@ bit HasD16 = 0; bit IsAtomicRet = 0; bit MSAA = 0; + bit BVH = 0; } def MIMGBaseOpcode : GenericEnum { @@ -54,7 +55,7 @@ let CppTypeName = "MIMGBaseOpcodeInfo"; let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler", "Gather4", "NumExtraArgs", "Gradients", "G16", "Coordinates", - "LodOrClampOrMip", "HasD16", "MSAA"]; + "LodOrClampOrMip", "HasD16", "MSAA", "BVH"]; string TypeOf_BaseOpcode = "MIMGBaseOpcode"; let PrimaryKey = ["BaseOpcode"]; @@ -872,6 +873,14 @@ multiclass MIMG_Gather_WQM : MIMG_Gather; +class MIMG_IntersectRay_Helper { + int num_addrs = !if(Is64, !if(A16, 9, 12), !if(A16, 8, 11)); + // TODO: MIMGAddrSize will choose VReg_512 which is a 16 register tuple, + // when we only need 9, 11 or 12 depending on A16 field and ptr size. + RegisterClass RegClass = MIMGAddrSize.RegClass; + int VAddrDwords = !srl(RegClass.Size, 5); +} + class MIMG_IntersectRay_gfx10 : MIMG_gfx10 { @@ -890,8 +899,11 @@ let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(A16, "$a16", ""); } -multiclass MIMG_IntersectRay { - def "" : MIMGBaseOpcode; +multiclass MIMG_IntersectRay { + defvar info = MIMG_IntersectRay_Helper; + def "" : MIMGBaseOpcode { + let BVH = 1; + } let SubtargetPredicate = HasGFX10_AEncoding, AssemblerPredicate = HasGFX10_AEncoding, AsmMatchConverter = !if(A16, "cvtIntersectRay", ""), @@ -908,13 +920,11 @@ d16 = 0, BaseOpcode = !cast(NAME), VDataDwords = 4 in { - // TODO: MIMGAddrSize will choose VReg_512 which is a 16 register tuple, - // when we only need 9, 11 or 12 depending on A16 field and ptr size. - def "_sa" : MIMG_IntersectRay_gfx10.RegClass, A16> { - let VAddrDwords = !srl(MIMGAddrSize.RegClass.Size, 5); + def _sa_gfx10 : MIMG_IntersectRay_gfx10 { + let VAddrDwords = info.VAddrDwords; } - def _nsa : MIMG_IntersectRay_nsa_gfx10 { - let VAddrDwords = num_addrs; + def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10 { + let VAddrDwords = info.num_addrs; } } } @@ -1045,10 +1055,10 @@ let SubtargetPredicate = HasGFX10_AEncoding in defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler , "image_msaa_load", 1, 0, 0, 1>; -defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay, "image_bvh_intersect_ray", 11, 0>; -defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay, "image_bvh_intersect_ray", 8, 1>; -defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay, "image_bvh64_intersect_ray", 12, 0>; -defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay, "image_bvh64_intersect_ray", 9, 1>; +defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay, "image_bvh_intersect_ray", 0, 0>; +defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay, "image_bvh_intersect_ray", 0, 1>; +defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay, "image_bvh64_intersect_ray", 1, 0>; +defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay, "image_bvh64_intersect_ray", 1, 1>; /********** ========================================= **********/ /********** Table of dimension-aware image intrinsics **********/ diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7373,19 +7373,25 @@ const bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16; const bool Is64 = NodePtr.getValueType() == MVT::i64; - const unsigned NumVAddrs = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11); - const bool UseNSA = - Subtarget->hasNSAEncoding() && NumVAddrs <= Subtarget->getNSAMaxSize(); - const unsigned Opcodes[2][2][2] = { - {{AMDGPU::IMAGE_BVH_INTERSECT_RAY_sa, - AMDGPU::IMAGE_BVH64_INTERSECT_RAY_sa}, - {AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa, - AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa}}, - {{AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa, - AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa}, - {AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa, - AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa}}}; - const unsigned Opcode = Opcodes[UseNSA][IsA16][Is64]; + const unsigned NumVDataDwords = 4; + const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11); + const bool UseNSA = Subtarget->hasNSAEncoding() && + NumVAddrDwords <= Subtarget->getNSAMaxSize(); + const unsigned BaseOpcodes[2][2] = { + {AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16}, + {AMDGPU::IMAGE_BVH64_INTERSECT_RAY, + AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}}; + int Opcode; + if (UseNSA) { + Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16], + AMDGPU::MIMGEncGfx10NSA, NumVDataDwords, + NumVAddrDwords); + } else { + Opcode = AMDGPU::getMIMGOpcode( + BaseOpcodes[Is64][IsA16], AMDGPU::MIMGEncGfx10Default, NumVDataDwords, + PowerOf2Ceil(NumVAddrDwords)); + } + assert(Opcode != -1); SmallVector Ops; @@ -7428,7 +7434,7 @@ if (!UseNSA) { // Build a single vector containing all the operands so far prepared. - if (NumVAddrs > 8) { + if (NumVAddrDwords > 8) { SDValue Undef = DAG.getUNDEF(MVT::i32); Ops.append(16 - Ops.size(), Undef); } diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -343,6 +343,9 @@ if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1 && AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0) == -1) return UNKNOWN; + // Ignore BVH instructions + if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH) + return UNKNOWN; // TODO: Support IMAGE_GET_RESINFO and IMAGE_GET_LOD. if (TII.get(Opc).mayStore() || !TII.get(Opc).mayLoad() || TII.isGather4(Opc)) @@ -380,15 +383,6 @@ case AMDGPU::DS_WRITE_B64: case AMDGPU::DS_WRITE_B64_gfx9: return DS_WRITE; - case AMDGPU::IMAGE_BVH_INTERSECT_RAY_sa: - case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_sa: - case AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa: - case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa: - case AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa: - case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa: - case AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa: - case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa: - return UNKNOWN; } } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -292,8 +292,12 @@ bool LodOrClampOrMip; bool HasD16; bool MSAA; + bool BVH; }; +LLVM_READONLY +const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc); + LLVM_READONLY const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt @@ -7869,40 +7869,40 @@ # GFX90A: image_load a5, v2, s[8:15] dmask:0x2 ; encoding: [0x00,0x02,0x01,0xf0,0x02,0x05,0x02,0x00] 0x00,0x02,0x01,0xf0,0x02,0x05,0x02,0x00 -# GFX90A: image_load a6, v2, s[8:15] dmask:0x3 ; encoding: [0x00,0x03,0x01,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_load a[6:7], v2, s[8:15] dmask:0x3 ; encoding: [0x00,0x03,0x01,0xf0,0x02,0x06,0x02,0x00] 0x00,0x03,0x01,0xf0,0x02,0x06,0x02,0x00 # GFX90A: image_load a5, v2, s[8:15] dmask:0x4 ; encoding: [0x00,0x04,0x01,0xf0,0x02,0x05,0x02,0x00] 0x00,0x04,0x01,0xf0,0x02,0x05,0x02,0x00 -# GFX90A: image_load a6, v2, s[8:15] dmask:0x5 ; encoding: [0x00,0x05,0x01,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_load a[6:7], v2, s[8:15] dmask:0x5 ; encoding: [0x00,0x05,0x01,0xf0,0x02,0x06,0x02,0x00] 0x00,0x05,0x01,0xf0,0x02,0x06,0x02,0x00 -# GFX90A: image_load a6, v2, s[8:15] dmask:0x6 ; encoding: [0x00,0x06,0x01,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_load a[6:7], v2, s[8:15] dmask:0x6 ; encoding: [0x00,0x06,0x01,0xf0,0x02,0x06,0x02,0x00] 0x00,0x06,0x01,0xf0,0x02,0x06,0x02,0x00 -# GFX90A: image_load a6, v2, s[8:15] dmask:0x7 ; encoding: [0x00,0x07,0x01,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_load a[6:8], v2, s[8:15] dmask:0x7 ; encoding: [0x00,0x07,0x01,0xf0,0x02,0x06,0x02,0x00] 0x00,0x07,0x01,0xf0,0x02,0x06,0x02,0x00 # GFX90A: image_load a5, v2, s[8:15] dmask:0x8 ; encoding: [0x00,0x08,0x01,0xf0,0x02,0x05,0x02,0x00] 0x00,0x08,0x01,0xf0,0x02,0x05,0x02,0x00 -# GFX90A: image_load a6, v2, s[8:15] dmask:0x9 ; encoding: [0x00,0x09,0x01,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_load a[6:7], v2, s[8:15] dmask:0x9 ; encoding: [0x00,0x09,0x01,0xf0,0x02,0x06,0x02,0x00] 0x00,0x09,0x01,0xf0,0x02,0x06,0x02,0x00 -# GFX90A: image_load a6, v2, s[8:15] dmask:0xa ; encoding: [0x00,0x0a,0x01,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_load a[6:7], v2, s[8:15] dmask:0xa ; encoding: [0x00,0x0a,0x01,0xf0,0x02,0x06,0x02,0x00] 0x00,0x0a,0x01,0xf0,0x02,0x06,0x02,0x00 -# GFX90A: image_load a6, v2, s[8:15] dmask:0xb ; encoding: [0x00,0x0b,0x01,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_load a[6:8], v2, s[8:15] dmask:0xb ; encoding: [0x00,0x0b,0x01,0xf0,0x02,0x06,0x02,0x00] 0x00,0x0b,0x01,0xf0,0x02,0x06,0x02,0x00 -# GFX90A: image_load a6, v2, s[8:15] dmask:0xc ; encoding: [0x00,0x0c,0x01,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_load a[6:7], v2, s[8:15] dmask:0xc ; encoding: [0x00,0x0c,0x01,0xf0,0x02,0x06,0x02,0x00] 0x00,0x0c,0x01,0xf0,0x02,0x06,0x02,0x00 -# GFX90A: image_load a6, v2, s[8:15] dmask:0xd ; encoding: [0x00,0x0d,0x01,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_load a[6:8], v2, s[8:15] dmask:0xd ; encoding: [0x00,0x0d,0x01,0xf0,0x02,0x06,0x02,0x00] 0x00,0x0d,0x01,0xf0,0x02,0x06,0x02,0x00 -# GFX90A: image_load a6, v2, s[8:15] dmask:0xe ; encoding: [0x00,0x0e,0x01,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_load a[6:8], v2, s[8:15] dmask:0xe ; encoding: [0x00,0x0e,0x01,0xf0,0x02,0x06,0x02,0x00] 0x00,0x0e,0x01,0xf0,0x02,0x06,0x02,0x00 # GFX90A: image_load a5, v2, s[8:15] ; encoding: [0x00,0x00,0x01,0xf0,0x02,0x05,0x02,0x00] @@ -7944,43 +7944,43 @@ # GFX90A: image_store a1, v2, s[12:19] dmask:0x2 unorm ; encoding: [0x00,0x12,0x21,0xf0,0x02,0x01,0x03,0x00] 0x00,0x12,0x21,0xf0,0x02,0x01,0x03,0x00 -# GFX90A: image_store a2, v2, s[12:19] dmask:0x3 unorm ; encoding: [0x00,0x13,0x21,0xf0,0x02,0x02,0x03,0x00] +# GFX90A: image_store a[2:3], v2, s[12:19] dmask:0x3 unorm ; encoding: [0x00,0x13,0x21,0xf0,0x02,0x02,0x03,0x00] 0x00,0x13,0x21,0xf0,0x02,0x02,0x03,0x00 # GFX90A: image_store a1, v2, s[12:19] dmask:0x4 unorm ; encoding: [0x00,0x14,0x21,0xf0,0x02,0x01,0x03,0x00] 0x00,0x14,0x21,0xf0,0x02,0x01,0x03,0x00 -# GFX90A: image_store a2, v2, s[12:19] dmask:0x5 unorm ; encoding: [0x00,0x15,0x21,0xf0,0x02,0x02,0x03,0x00] +# GFX90A: image_store a[2:3], v2, s[12:19] dmask:0x5 unorm ; encoding: [0x00,0x15,0x21,0xf0,0x02,0x02,0x03,0x00] 0x00,0x15,0x21,0xf0,0x02,0x02,0x03,0x00 -# GFX90A: image_store a2, v2, s[12:19] dmask:0x6 unorm ; encoding: [0x00,0x16,0x21,0xf0,0x02,0x02,0x03,0x00] +# GFX90A: image_store a[2:3], v2, s[12:19] dmask:0x6 unorm ; encoding: [0x00,0x16,0x21,0xf0,0x02,0x02,0x03,0x00] 0x00,0x16,0x21,0xf0,0x02,0x02,0x03,0x00 -# GFX90A: image_store a2, v2, s[12:19] dmask:0x7 unorm ; encoding: [0x00,0x17,0x21,0xf0,0x02,0x02,0x03,0x00] +# GFX90A: image_store a[2:4], v2, s[12:19] dmask:0x7 unorm ; encoding: [0x00,0x17,0x21,0xf0,0x02,0x02,0x03,0x00] 0x00,0x17,0x21,0xf0,0x02,0x02,0x03,0x00 # GFX90A: image_store a1, v2, s[12:19] dmask:0x8 unorm ; encoding: [0x00,0x18,0x21,0xf0,0x02,0x01,0x03,0x00] 0x00,0x18,0x21,0xf0,0x02,0x01,0x03,0x00 -# GFX90A: image_store a2, v2, s[12:19] dmask:0x9 unorm ; encoding: [0x00,0x19,0x21,0xf0,0x02,0x02,0x03,0x00] +# GFX90A: image_store a[2:3], v2, s[12:19] dmask:0x9 unorm ; encoding: [0x00,0x19,0x21,0xf0,0x02,0x02,0x03,0x00] 0x00,0x19,0x21,0xf0,0x02,0x02,0x03,0x00 -# GFX90A: image_store a2, v2, s[12:19] dmask:0xa unorm ; encoding: [0x00,0x1a,0x21,0xf0,0x02,0x02,0x03,0x00] +# GFX90A: image_store a[2:3], v2, s[12:19] dmask:0xa unorm ; encoding: [0x00,0x1a,0x21,0xf0,0x02,0x02,0x03,0x00] 0x00,0x1a,0x21,0xf0,0x02,0x02,0x03,0x00 -# GFX90A: image_store a2, v2, s[12:19] dmask:0xb unorm ; encoding: [0x00,0x1b,0x21,0xf0,0x02,0x02,0x03,0x00] +# GFX90A: image_store a[2:4], v2, s[12:19] dmask:0xb unorm ; encoding: [0x00,0x1b,0x21,0xf0,0x02,0x02,0x03,0x00] 0x00,0x1b,0x21,0xf0,0x02,0x02,0x03,0x00 -# GFX90A: image_store a2, v2, s[12:19] dmask:0xc unorm ; encoding: [0x00,0x1c,0x21,0xf0,0x02,0x02,0x03,0x00] +# GFX90A: image_store a[2:3], v2, s[12:19] dmask:0xc unorm ; encoding: [0x00,0x1c,0x21,0xf0,0x02,0x02,0x03,0x00] 0x00,0x1c,0x21,0xf0,0x02,0x02,0x03,0x00 -# GFX90A: image_store a2, v2, s[12:19] dmask:0xd unorm ; encoding: [0x00,0x1d,0x21,0xf0,0x02,0x02,0x03,0x00] +# GFX90A: image_store a[2:4], v2, s[12:19] dmask:0xd unorm ; encoding: [0x00,0x1d,0x21,0xf0,0x02,0x02,0x03,0x00] 0x00,0x1d,0x21,0xf0,0x02,0x02,0x03,0x00 -# GFX90A: image_store a2, v2, s[12:19] dmask:0xe unorm ; encoding: [0x00,0x1e,0x21,0xf0,0x02,0x02,0x03,0x00] +# GFX90A: image_store a[2:4], v2, s[12:19] dmask:0xe unorm ; encoding: [0x00,0x1e,0x21,0xf0,0x02,0x02,0x03,0x00] 0x00,0x1e,0x21,0xf0,0x02,0x02,0x03,0x00 -# GFX90A: image_store a2, v2, s[12:19] dmask:0xf unorm ; encoding: [0x00,0x1f,0x21,0xf0,0x02,0x02,0x03,0x00] +# GFX90A: image_store a[2:5], v2, s[12:19] dmask:0xf unorm ; encoding: [0x00,0x1f,0x21,0xf0,0x02,0x02,0x03,0x00] 0x00,0x1f,0x21,0xf0,0x02,0x02,0x03,0x00 # GFX90A: image_store a1, v2, s[12:19] unorm ; encoding: [0x00,0x10,0x21,0xf0,0x02,0x01,0x03,0x00] @@ -8016,7 +8016,7 @@ # GFX90A: image_atomic_swap a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x41,0xf0,0x02,0x05,0x17,0x00] 0x00,0x11,0x41,0xf0,0x02,0x05,0x17,0x00 -# GFX90A: image_atomic_swap a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x41,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_atomic_swap a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x41,0xf0,0x02,0x06,0x02,0x00] 0x00,0x13,0x41,0xf0,0x02,0x06,0x02,0x00 # GFX90A: image_atomic_swap a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x41,0xf0,0x02,0x05,0x02,0x00] @@ -8046,7 +8046,7 @@ # GFX90A: image_atomic_cmpswap a[6:7], v2, s[92:99] dmask:0x3 unorm ; encoding: [0x00,0x13,0x45,0xf0,0x02,0x06,0x17,0x00] 0x00,0x13,0x45,0xf0,0x02,0x06,0x17,0x00 -# GFX90A: image_atomic_cmpswap a[6:7], v2, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x45,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_atomic_cmpswap a[6:9], v2, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x45,0xf0,0x02,0x06,0x02,0x00] 0x00,0x1f,0x45,0xf0,0x02,0x06,0x02,0x00 # GFX90A: image_atomic_cmpswap a[6:7], v2, s[8:15] dmask:0x3 unorm glc ; encoding: [0x00,0x33,0x45,0xf0,0x02,0x06,0x02,0x00] @@ -8076,7 +8076,7 @@ # GFX90A: image_atomic_add a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x49,0xf0,0x02,0x05,0x17,0x00] 0x00,0x11,0x49,0xf0,0x02,0x05,0x17,0x00 -# GFX90A: image_atomic_add a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x49,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_atomic_add a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x49,0xf0,0x02,0x06,0x02,0x00] 0x00,0x13,0x49,0xf0,0x02,0x06,0x02,0x00 # GFX90A: image_atomic_add a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x49,0xf0,0x02,0x05,0x02,0x00] @@ -8106,7 +8106,7 @@ # GFX90A: image_atomic_sub a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x4d,0xf0,0x02,0x05,0x17,0x00] 0x00,0x11,0x4d,0xf0,0x02,0x05,0x17,0x00 -# GFX90A: image_atomic_sub a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x4d,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_atomic_sub a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x4d,0xf0,0x02,0x06,0x02,0x00] 0x00,0x13,0x4d,0xf0,0x02,0x06,0x02,0x00 # GFX90A: image_atomic_sub a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x4d,0xf0,0x02,0x05,0x02,0x00] @@ -8136,7 +8136,7 @@ # GFX90A: image_atomic_smin a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x51,0xf0,0x02,0x05,0x17,0x00] 0x00,0x11,0x51,0xf0,0x02,0x05,0x17,0x00 -# GFX90A: image_atomic_smin a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x51,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_atomic_smin a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x51,0xf0,0x02,0x06,0x02,0x00] 0x00,0x13,0x51,0xf0,0x02,0x06,0x02,0x00 # GFX90A: image_atomic_smin a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x51,0xf0,0x02,0x05,0x02,0x00] @@ -8166,7 +8166,7 @@ # GFX90A: image_atomic_umin a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x55,0xf0,0x02,0x05,0x17,0x00] 0x00,0x11,0x55,0xf0,0x02,0x05,0x17,0x00 -# GFX90A: image_atomic_umin a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x55,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_atomic_umin a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x55,0xf0,0x02,0x06,0x02,0x00] 0x00,0x13,0x55,0xf0,0x02,0x06,0x02,0x00 # GFX90A: image_atomic_umin a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x55,0xf0,0x02,0x05,0x02,0x00] @@ -8196,7 +8196,7 @@ # GFX90A: image_atomic_smax a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x59,0xf0,0x02,0x05,0x17,0x00] 0x00,0x11,0x59,0xf0,0x02,0x05,0x17,0x00 -# GFX90A: image_atomic_smax a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x59,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_atomic_smax a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x59,0xf0,0x02,0x06,0x02,0x00] 0x00,0x13,0x59,0xf0,0x02,0x06,0x02,0x00 # GFX90A: image_atomic_smax a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x59,0xf0,0x02,0x05,0x02,0x00] @@ -8226,7 +8226,7 @@ # GFX90A: image_atomic_umax a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x5d,0xf0,0x02,0x05,0x17,0x00] 0x00,0x11,0x5d,0xf0,0x02,0x05,0x17,0x00 -# GFX90A: image_atomic_umax a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x5d,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_atomic_umax a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x5d,0xf0,0x02,0x06,0x02,0x00] 0x00,0x13,0x5d,0xf0,0x02,0x06,0x02,0x00 # GFX90A: image_atomic_umax a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x5d,0xf0,0x02,0x05,0x02,0x00] @@ -8256,7 +8256,7 @@ # GFX90A: image_atomic_and a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x61,0xf0,0x02,0x05,0x17,0x00] 0x00,0x11,0x61,0xf0,0x02,0x05,0x17,0x00 -# GFX90A: image_atomic_and a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x61,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_atomic_and a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x61,0xf0,0x02,0x06,0x02,0x00] 0x00,0x13,0x61,0xf0,0x02,0x06,0x02,0x00 # GFX90A: image_atomic_and a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x61,0xf0,0x02,0x05,0x02,0x00] @@ -8286,7 +8286,7 @@ # GFX90A: image_atomic_or a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x65,0xf0,0x02,0x05,0x17,0x00] 0x00,0x11,0x65,0xf0,0x02,0x05,0x17,0x00 -# GFX90A: image_atomic_or a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x65,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_atomic_or a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x65,0xf0,0x02,0x06,0x02,0x00] 0x00,0x13,0x65,0xf0,0x02,0x06,0x02,0x00 # GFX90A: image_atomic_or a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x65,0xf0,0x02,0x05,0x02,0x00] @@ -8316,7 +8316,7 @@ # GFX90A: image_atomic_xor a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x69,0xf0,0x02,0x05,0x17,0x00] 0x00,0x11,0x69,0xf0,0x02,0x05,0x17,0x00 -# GFX90A: image_atomic_xor a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x69,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_atomic_xor a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x69,0xf0,0x02,0x06,0x02,0x00] 0x00,0x13,0x69,0xf0,0x02,0x06,0x02,0x00 # GFX90A: image_atomic_xor a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x69,0xf0,0x02,0x05,0x02,0x00] @@ -8346,7 +8346,7 @@ # GFX90A: image_atomic_inc a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x6d,0xf0,0x02,0x05,0x17,0x00] 0x00,0x11,0x6d,0xf0,0x02,0x05,0x17,0x00 -# GFX90A: image_atomic_inc a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x6d,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_atomic_inc a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x6d,0xf0,0x02,0x06,0x02,0x00] 0x00,0x13,0x6d,0xf0,0x02,0x06,0x02,0x00 # GFX90A: image_atomic_inc a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x6d,0xf0,0x02,0x05,0x02,0x00] @@ -8376,7 +8376,7 @@ # GFX90A: image_atomic_dec a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x71,0xf0,0x02,0x05,0x17,0x00] 0x00,0x11,0x71,0xf0,0x02,0x05,0x17,0x00 -# GFX90A: image_atomic_dec a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x71,0xf0,0x02,0x06,0x02,0x00] +# GFX90A: image_atomic_dec a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x71,0xf0,0x02,0x06,0x02,0x00] 0x00,0x13,0x71,0xf0,0x02,0x06,0x02,0x00 # GFX90A: image_atomic_dec a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x71,0xf0,0x02,0x05,0x02,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/mimg_gfx90a.txt b/llvm/test/MC/Disassembler/AMDGPU/mimg_gfx90a.txt --- a/llvm/test/MC/Disassembler/AMDGPU/mimg_gfx90a.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/mimg_gfx90a.txt @@ -1,6 +1,6 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=gfx90a -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX90A -# GFX90A: image_load v4, v238, s[28:35] dmask:0x7 unorm ; encoding: [0x00,0x17,0x00,0xf0,0xee,0x04,0x07,0x00] +# GFX90A: image_load v[4:6], v238, s[28:35] dmask:0x7 unorm ; encoding: [0x00,0x17,0x00,0xf0,0xee,0x04,0x07,0x00] 0x00,0x17,0x00,0xf0,0xee,0x04,0x07,0x00 # GFX90A: image_load_pck v5, v0, s[8:15] dmask:0x1 glc ; encoding: [0x00,0x21,0x08,0xf0,0x00,0x05,0x02,0x00] @@ -15,10 +15,10 @@ # GFX90A: image_load_mip_pck v5, v1, s[8:15] dmask:0x1 ; encoding: [0x00,0x01,0x10,0xf0,0x01,0x05,0x02,0x00] 0x00,0x01,0x10,0xf0,0x01,0x05,0x02,0x00 -# GFX90A: image_load_mip_pck_sgn v4, v0, s[8:15] dmask:0x5 ; encoding: [0x00,0x05,0x14,0xf0,0x00,0x04,0x02,0x00] +# GFX90A: image_load_mip_pck_sgn v[4:5], v0, s[8:15] dmask:0x5 ; encoding: [0x00,0x05,0x14,0xf0,0x00,0x04,0x02,0x00] 0x00,0x05,0x14,0xf0,0x00,0x04,0x02,0x00 -# GFX90A: image_store v192, v238, s[28:35] dmask:0x7 unorm ; encoding: [0x00,0x17,0x20,0xf0,0xee,0xc0,0x07,0x00] +# GFX90A: image_store v[192:194], v238, s[28:35] dmask:0x7 unorm ; encoding: [0x00,0x17,0x20,0xf0,0xee,0xc0,0x07,0x00] 0x00,0x17,0x20,0xf0,0xee,0xc0,0x07,0x00 # GFX90A: image_store_pck v1, v2, s[12:19] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x28,0xf0,0x02,0x01,0x03,0x00]