diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1632,7 +1632,7 @@ bool validateMIMGGatherDMask(const MCInst &Inst); bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc); - bool validateMIMGAddrSize(const MCInst &Inst); + bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc); bool validateMIMGD16(const MCInst &Inst); bool validateMIMGMSAA(const MCInst &Inst); bool validateOpSel(const MCInst &Inst); @@ -1742,8 +1742,6 @@ void cvtMIMG(MCInst &Inst, const OperandVector &Operands, bool IsAtomic = false); void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); - void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); - void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); bool parseDimId(unsigned &Encoding); @@ -3580,7 +3578,8 @@ return false; } -bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { +bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, + const SMLoc &IDLoc) { const unsigned Opc = Inst.getOpcode(); const MCInstrDesc &Desc = MII.get(Opc); @@ -3600,8 +3599,13 @@ assert(SrsrcIdx != -1); assert(SrsrcIdx > VAddr0Idx); - if (DimIdx == -1) - return true; // intersect_ray + bool IsA16 = Inst.getOperand(A16Idx).getImm(); + if (BaseOpcode->BVH) { + if (IsA16 == BaseOpcode->A16) + return true; + Error(IDLoc, "image address size does not match a16"); + return false; + } unsigned Dim = Inst.getOperand(DimIdx).getImm(); const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); @@ -3609,7 +3613,6 @@ unsigned ActualAddrSize = IsNSA ? SrsrcIdx - VAddr0Idx : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; - bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); unsigned ExpectedAddrSize = AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); @@ -3620,7 +3623,7 @@ unsigned VAddrLastSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4; - return VAddrLastIdx - VAddr0Idx + VAddrLastSize == ExpectedAddrSize; + ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize; } } else { if (ExpectedAddrSize > 12) @@ -3633,7 +3636,11 @@ return true; } - return ActualAddrSize == ExpectedAddrSize; + if (ActualAddrSize == ExpectedAddrSize) + return true; + + Error(IDLoc, "image address size does not match dim and a16"); + return false; } bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { @@ -4569,11 +4576,8 @@ if (!validateMIMGDataSize(Inst, IDLoc)) { return false; } - if (!validateMIMGAddrSize(Inst)) { - Error(IDLoc, - "image address size does not match dim and a16"); + if (!validateMIMGAddrSize(Inst, IDLoc)) return false; - } if (!validateMIMGAtomicDMask(Inst)) { Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), "invalid atomic image dmask"); @@ -7760,17 +7764,6 @@ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); } -void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, - const OperandVector &Operands) { - for (unsigned I = 1; I < Operands.size(); ++I) { - auto &Operand = (AMDGPUOperand &)*Operands[I]; - if (Operand.isReg()) - Operand.addRegOperands(Inst, 1); - } - - Inst.addOperand(MCOperand::createImm(1)); // a16 -} - //===----------------------------------------------------------------------===// // smrd //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -865,8 +865,7 @@ assert(VDataIdx != -1); if (BaseOpcode->BVH) { // Add A16 operand for intersect_ray instructions - if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::a16)) - addOperand(MI, MCOperand::createImm(1)); + addOperand(MI, MCOperand::createImm(BaseOpcode->A16)); return MCDisassembler::Success; } diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -48,6 +48,7 @@ bit IsAtomicRet = 0; bit MSAA = 0; bit BVH = 0; + bit A16 = 0; } def MIMGBaseOpcode : GenericEnum { @@ -59,7 +60,7 @@ let CppTypeName = "MIMGBaseOpcodeInfo"; let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler", "Gather4", "NumExtraArgs", "Gradients", "G16", "Coordinates", - "LodOrClampOrMip", "HasD16", "MSAA", "BVH"]; + "LodOrClampOrMip", "HasD16", "MSAA", "BVH", "A16"]; string TypeOf_BaseOpcode = "MIMGBaseOpcode"; let PrimaryKey = ["BaseOpcode"]; @@ -1191,50 +1192,43 @@ [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]); } -class MIMG_IntersectRay_gfx10 +class MIMG_IntersectRay_gfx10 : MIMG_gfx10 { - - let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc), - !if(IsA16, (ins A16:$a16), (ins))); - let AsmString = opcode#" $vdata, $vaddr0, $srsrc"#!if(IsA16, "$a16", ""); + let InOperandList = (ins AddrRC:$vaddr0, SReg_128:$srsrc, A16:$a16); + let AsmString = opcode#" $vdata, $vaddr0, $srsrc$a16"; let nsa = 0; } -class MIMG_IntersectRay_nsa_gfx10 +class MIMG_IntersectRay_nsa_gfx10 : MIMG_nsa_gfx10 { - let InOperandList = !con(nsah.AddrIns, - (ins SReg_128:$srsrc), - !if(IsA16, (ins A16:$a16), (ins))); - let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(IsA16, "$a16", ""); + let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$srsrc, A16:$a16)); + let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc$a16"; } -class MIMG_IntersectRay_gfx11 +class MIMG_IntersectRay_gfx11 : MIMG_gfx11 { - - let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc), - !if(IsA16, (ins A16:$a16), (ins))); - let AsmString = opcode#" $vdata, $vaddr0, $srsrc"#!if(IsA16, "$a16", ""); + let InOperandList = (ins AddrRC:$vaddr0, SReg_128:$srsrc, A16:$a16); + let AsmString = opcode#" $vdata, $vaddr0, $srsrc$a16"; let nsa = 0; } class MIMG_IntersectRay_nsa_gfx11 addr_types> + list addr_types> : MIMG_nsa_gfx11 { - let InOperandList = !con(nsah.AddrIns, - (ins SReg_128:$srsrc), - !if(IsA16, (ins A16:$a16), (ins))); - let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(IsA16, "$a16", ""); + let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$srsrc, A16:$a16)); + let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc$a16"; } multiclass MIMG_IntersectRay { defvar info = MIMG_IntersectRay_Helper; def "" : MIMGBaseOpcode { let BVH = 1; + let A16 = IsA16; } - let AsmMatchConverter = !if(IsA16, "cvtIntersectRay", ""), + let AsmMatchConverter = "", dmask = 0xf, unorm = 1, d16 = 0, @@ -1248,17 +1242,17 @@ d16 = 0, BaseOpcode = !cast(NAME), VDataDwords = 4 in { - def _sa_gfx10 : MIMG_IntersectRay_gfx10 { + def _sa_gfx10 : MIMG_IntersectRay_gfx10 { let VAddrDwords = info.VAddrDwords; } - def _sa_gfx11 : MIMG_IntersectRay_gfx11 { + def _sa_gfx11 : MIMG_IntersectRay_gfx11 { let VAddrDwords = info.VAddrDwords; } - def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10 { + def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10 { let VAddrDwords = info.num_addrs; } def _nsa_gfx11 : MIMG_IntersectRay_nsa_gfx11 { let VAddrDwords = info.num_addrs; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8044,8 +8044,7 @@ } Ops.push_back(TDescr); - if (IsA16) - Ops.push_back(DAG.getTargetConstant(1, DL, MVT::i1)); + Ops.push_back(DAG.getTargetConstant(IsA16, DL, MVT::i1)); Ops.push_back(M->getChain()); auto *NewNode = DAG.getMachineNode(Opcode, DL, M->getVTList(), Ops); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -355,6 +355,7 @@ bool HasD16; bool MSAA; bool BVH; + bool A16; }; LLVM_READONLY diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir --- a/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir @@ -7,11 +7,11 @@ bb.0: ; GCN-LABEL: name: waitcnt-check-inorder ; GCN: S_WAITCNT 0 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; GCN-NEXT: S_ENDPGM 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) S_ENDPGM 0 ... --- @@ -20,11 +20,11 @@ bb.0: ; GCN-LABEL: name: waitcnt-check-vs-vmem ; GCN: S_WAITCNT 0 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; GCN-NEXT: S_WAITCNT 16240 ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -34,11 +34,11 @@ bb.0: ; GCN-LABEL: name: waitcnt-check-vs-mimg-samp ; GCN: S_WAITCNT 0 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; GCN-NEXT: S_WAITCNT 16240 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) ; GCN-NEXT: S_ENDPGM 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) S_ENDPGM 0 ... @@ -50,10 +50,10 @@ ; GCN: S_WAITCNT 0 ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec ; GCN-NEXT: S_WAITCNT 16240 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) S_ENDPGM 0 ... --- @@ -64,9 +64,9 @@ ; GCN: S_WAITCNT 0 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr16_vgpr17, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) ; GCN-NEXT: S_WAITCNT 16240 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; GCN-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr16_vgpr17, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) S_ENDPGM 0 ... diff --git a/llvm/test/MC/AMDGPU/gfx1030_err.s b/llvm/test/MC/AMDGPU/gfx1030_err.s --- a/llvm/test/MC/AMDGPU/gfx1030_err.s +++ b/llvm/test/MC/AMDGPU/gfx1030_err.s @@ -207,3 +207,9 @@ s_waitcnt_depctr depctr_sa_sdst(0) depctr_sa_sdst(0) // GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: duplicate counter name depctr_sa_sdst + +image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] noa16 +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match a16 + +image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] noa16 +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match a16 diff --git a/llvm/test/MC/AMDGPU/gfx1030_new.s b/llvm/test/MC/AMDGPU/gfx1030_new.s --- a/llvm/test/MC/AMDGPU/gfx1030_new.s +++ b/llvm/test/MC/AMDGPU/gfx1030_new.s @@ -87,10 +87,10 @@ image_bvh_intersect_ray v[4:7], v[9:19], s[4:7] // GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00] -image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 -// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x40] +image_bvh_intersect_ray v[4:7], v[9:19], s[4:7] noa16 +// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00] -image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] noa16 +image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 // GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x40] image_bvh64_intersect_ray v[4:7], v[9:20], s[4:7] @@ -102,10 +102,10 @@ image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15] // GFX10: encoding: [0x07,0x9f,0x98,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x00,0x00] -image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] a16 -// GFX10: encoding: [0x05,0x9f,0x98,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x00] +image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15] noa16 +// GFX10: encoding: [0x07,0x9f,0x98,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x00,0x00] -image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] noa16 +image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] a16 // GFX10: encoding: [0x05,0x9f,0x98,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x00] image_bvh64_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40, v42], s[12:15] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_mimg_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_mimg_err.s --- a/llvm/test/MC/AMDGPU/gfx11_asm_mimg_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_mimg_err.s @@ -146,3 +146,9 @@ image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22]], s[12:15] a16 // NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] noa16 +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match a16 + +image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] noa16 +// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match a16 diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_mimg_features.s b/llvm/test/MC/AMDGPU/gfx11_asm_mimg_features.s --- a/llvm/test/MC/AMDGPU/gfx11_asm_mimg_features.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_mimg_features.s @@ -325,10 +325,10 @@ image_bvh_intersect_ray v[4:7], v[9:19], s[4:7] // GFX11: image_bvh_intersect_ray v[4:7], v[9:19], s[4:7] ; encoding: [0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00] -image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 -// GFX11: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0x09,0x04,0x01,0x00] +image_bvh_intersect_ray v[4:7], v[9:19], s[4:7] noa16 +// GFX11: image_bvh_intersect_ray v[4:7], v[9:19], s[4:7] ; encoding: [0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00] -image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] noa16 +image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 // GFX11: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0x09,0x04,0x01,0x00] image_bvh64_intersect_ray v[4:7], v[9:20], s[4:7] @@ -340,10 +340,10 @@ image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] // GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x64,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f] -image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16 -// GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16 ; encoding: [0x81,0x8f,0x65,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00] +image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] noa16 +// GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x64,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f] -image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] noa16 +image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16 // GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16 ; encoding: [0x81,0x8f,0x65,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00] image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42], v[47:49]], s[12:15]