diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -657,7 +657,7 @@ bool isSendMsg() const; bool isSwizzle() const; bool isSMRDOffset8() const; - bool isSMRDOffset20() const; + bool isSMEMOffset() const; bool isSMRDLiteralOffset() const; bool isDPP8() const; bool isDPPCtrl() const; @@ -1326,9 +1326,11 @@ void errorExpTgt(); OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; + SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); + bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); bool validateSOPLiteral(const MCInst &Inst) const; bool validateConstantBusLimitations(const MCInst &Inst); bool validateEarlyClobberLimitations(const MCInst &Inst); @@ -1405,7 +1407,7 @@ AMDGPUOperand::Ptr defaultSLC() const; AMDGPUOperand::Ptr defaultSMRDOffset8() const; - AMDGPUOperand::Ptr defaultSMRDOffset20() const; + AMDGPUOperand::Ptr defaultSMEMOffset() const; AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; AMDGPUOperand::Ptr defaultFlatOffset() const; @@ -3395,6 +3397,46 @@ return true; } +SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + if (Op.isSMEMOffset()) + return Op.getStartLoc(); + } + return getLoc(); +} + +bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, + const OperandVector &Operands) { + if (isCI() || isSI()) + return true; + + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + if ((TSFlags & SIInstrFlags::SMRD) == 0) + return true; + + auto Opcode = Inst.getOpcode(); + auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); + if (OpNum == -1) + return true; + + const auto &Op = Inst.getOperand(OpNum); + if (!Op.isImm()) + return true; + + uint64_t Offset = Op.getImm(); + bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); + if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || + AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) + return true; + + Error(getSMEMOffsetLoc(Operands), + (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : + "expected a 21-bit signed offset"); + + return false; +} + bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { unsigned Opcode = Inst.getOpcode(); const MCInstrDesc &Desc = MII.get(Opcode); @@ -3572,6 +3614,9 @@ if (!validateFlatOffset(Inst, Operands)) { return false; } + if (!validateSMEMOffset(Inst, Operands)) { + return false; + } return true; } @@ -6071,8 +6116,8 @@ return isImm() && isUInt<8>(getImm()); } -bool AMDGPUOperand::isSMRDOffset20() const { - return isImm() && isUInt<20>(getImm()); +bool AMDGPUOperand::isSMEMOffset() const { + return isImm(); // Offset range is checked later by validator. } bool AMDGPUOperand::isSMRDLiteralOffset() const { @@ -6085,7 +6130,7 @@ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); } -AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); } @@ -7097,6 +7142,8 @@ return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; case MCK_AttrChan: return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; + case MCK_ImmSMEMOffset: + return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; case MCK_SReg_64: case MCK_SReg_64_XEXEC: // Null is defined as a 32-bit register but diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -100,6 +100,18 @@ return addOperand(Inst, MCOperand::createImm(Imm)); } +static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder) { + auto DAsm = static_cast(Decoder); + int64_t Offset; + if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets. + Offset = Imm & 0xFFFFF; + } else { // GFX9+ supports 21-bit signed offsets. + Offset = SignExtend64<21>(Imm); + } + return addOperand(Inst, MCOperand::createImm(Offset)); +} + static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder) { auto DAsm = static_cast(Decoder); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -61,7 +61,7 @@ raw_ostream &O); void printSMRDOffset8(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); - void printSMRDOffset20(const MCInst *MI, unsigned OpNo, + void printSMEMOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -180,10 +180,10 @@ printU32ImmOperand(MI, OpNo, STI, O); } -void AMDGPUInstPrinter::printSMRDOffset20(const MCInst *MI, unsigned OpNo, +void AMDGPUInstPrinter::printSMEMOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - printU32ImmOperand(MI, OpNo, STI, O); + O << formatHex(MI->getOperand(OpNo).getImm()); } void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -51,6 +51,12 @@ return 0; } + virtual unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + return 0; + } + virtual unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -70,6 +70,10 @@ SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; + unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; + unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; @@ -358,6 +362,15 @@ return getMachineOpValue(MI, MO, Fixups, STI); } +unsigned SIMCCodeEmitter::getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + auto Offset = MI.getOperand(OpNo).getImm(); + // VI only supports 20-bit unsigned offsets. + assert(!AMDGPU::isVI(STI) || isUInt<20>(Offset)); + return Offset; +} + unsigned SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -11,9 +11,11 @@ let OperandType = "OPERAND_IMMEDIATE"; } -def smrd_offset_20 : NamedOperandU32<"SMRDOffset20", - NamedMatchClass<"SMRDOffset20">> { +def smem_offset : NamedOperandU32<"SMEMOffset", + NamedMatchClass<"SMEMOffset">> { let OperandType = "OPERAND_IMMEDIATE"; + let EncoderMethod = "getSMEMOffsetEncoding"; + let DecoderMethod = "decodeSMEMOffset"; } //===----------------------------------------------------------------------===// @@ -43,6 +45,7 @@ bit has_dlc = 0; bits<1> has_offset = 1; bits<1> offset_is_imm = 0; + bit is_buffer = 0; } class SM_Real @@ -51,9 +54,15 @@ let isPseudo = 0; let isCodeGenOnly = 0; + Instruction Opcode = !cast(NAME); + // copy relevant pseudo op flags let SubtargetPredicate = ps.SubtargetPredicate; let AsmMatchConverter = ps.AsmMatchConverter; + let UseNamedOperandTable = ps.UseNamedOperandTable; + let SMRD = ps.SMRD; + + bit is_buffer = ps.is_buffer; // encoding bits<7> sbase; @@ -153,7 +162,7 @@ } multiclass SM_Pseudo_Discards { - def _IMM : SM_Discard_Pseudo ; + def _IMM : SM_Discard_Pseudo ; def _SGPR : SM_Discard_Pseudo ; } @@ -185,7 +194,7 @@ } multiclass SM_Pseudo_Probe { - def _IMM : SM_Probe_Pseudo ; + def _IMM : SM_Probe_Pseudo ; def _SGPR : SM_Probe_Pseudo ; } @@ -228,7 +237,7 @@ SM_Atomic_Pseudo { @@ -266,6 +275,7 @@ defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <"s_load_dwordx8", SReg_64, SReg_256>; defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <"s_load_dwordx16", SReg_64, SReg_512>; +let is_buffer = 1 in { defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads < "s_buffer_load_dword", SReg_128, SReg_32_XM0_XEXEC >; @@ -287,12 +297,14 @@ defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads < "s_buffer_load_dwordx16", SReg_128, SReg_512 >; +} let SubtargetPredicate = HasScalarStores in { defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>; defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>; defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>; +let is_buffer = 1 in { defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores < "s_buffer_store_dword", SReg_128, SReg_32_XM0_XEXEC >; @@ -304,6 +316,7 @@ defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores < "s_buffer_store_dwordx4", SReg_128, SReg_128 >; +} } // End SubtargetPredicate = HasScalarStores def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>; @@ -321,7 +334,9 @@ def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>; defm S_ATC_PROBE : SM_Pseudo_Probe <"s_atc_probe", SReg_64>; +let is_buffer = 1 in { defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>; +} } // SubtargetPredicate = isGFX8Plus let SubtargetPredicate = isGFX10Plus in { @@ -341,6 +356,7 @@ let SubtargetPredicate = HasScalarAtomics in { +let is_buffer = 1 in { defm S_BUFFER_ATOMIC_SWAP : SM_Pseudo_Atomics <"s_buffer_atomic_swap", SReg_128, SReg_32_XM0_XEXEC>; defm S_BUFFER_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <"s_buffer_atomic_cmpswap", SReg_128, SReg_64_XEXEC>; defm S_BUFFER_ATOMIC_ADD : SM_Pseudo_Atomics <"s_buffer_atomic_add", SReg_128, SReg_32_XM0_XEXEC>; @@ -368,6 +384,7 @@ defm S_BUFFER_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_xor_x2", SReg_128, SReg_64_XEXEC>; defm S_BUFFER_ATOMIC_INC_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_inc_x2", SReg_128, SReg_64_XEXEC>; defm S_BUFFER_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_dec_x2", SReg_128, SReg_64_XEXEC>; +} defm S_ATOMIC_SWAP : SM_Pseudo_Atomics <"s_atomic_swap", SReg_64, SReg_32_XM0_XEXEC>; defm S_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <"s_atomic_cmpswap", SReg_64, SReg_64_XEXEC>; @@ -481,14 +498,17 @@ let Inst{17} = imm; let Inst{25-18} = op; let Inst{31-26} = 0x30; //encoding - let Inst{51-32} = !if(ps.has_offset, offset{19-0}, ?); + + // VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed. + // Offset value is corrected accordingly when offset is encoded/decoded. + let Inst{52-32} = !if(ps.has_offset, offset{20-0}, ?); } multiclass SM_Real_Loads_vi op, string ps, SM_Load_Pseudo immPs = !cast(ps#_IMM), SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> { def _IMM_vi : SMEM_Real_vi { - let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc); + let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc); } def _SGPR_vi : SMEM_Real_vi { let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); @@ -509,7 +529,7 @@ // FIXME: The operand name $offset is inconsistent with $soff used // in the pseudo def _IMM_vi : SMEM_Real_Store_vi { - let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc); + let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc); } def _SGPR_vi : SMEM_Real_Store_vi { @@ -665,12 +685,10 @@ let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc, DLC:$dlc); let LGKM_CNT = ps.LGKM_CNT; - let SMRD = ps.SMRD; let mayLoad = ps.mayLoad; let mayStore = ps.mayStore; let hasSideEffects = ps.hasSideEffects; let SchedRW = ps.SchedRW; - let UseNamedOperandTable = ps.UseNamedOperandTable; let Inst{7-0} = 0xff; let Inst{8} = 0; @@ -849,7 +867,7 @@ let Inst{16} = !if(ps.has_glc, glc, ?); let Inst{25-18} = op; let Inst{31-26} = 0x3d; - let Inst{51-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{19-0}, ?), ?); + let Inst{52-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{20-0}, ?), ?); let Inst{63-57} = !if(ps.offset_is_imm, !cast(SGPR_NULL.HWEncoding), !if(ps.has_offset, offset{6-0}, ?)); } @@ -858,7 +876,7 @@ SM_Load_Pseudo immPs = !cast(ps#_IMM), SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> { def _IMM_gfx10 : SMEM_Real_gfx10 { - let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc); + let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc); } def _SGPR_gfx10 : SMEM_Real_gfx10 { let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); @@ -878,7 +896,7 @@ // FIXME: The operand name $offset is inconsistent with $soff used // in the pseudo def _IMM_gfx10 : SMEM_Real_Store_gfx10 { - let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc); + let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc); } def _SGPR_gfx10 : SMEM_Real_Store_gfx10 { @@ -1025,3 +1043,12 @@ defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29, "S_DCACHE_DISCARD_X2">; } // End SubtargetPredicate = HasScalarAtomics + +def SMInfoTable : GenericTable { + let FilterClass = "SM_Real"; + let CppTypeName = "SMInfo"; + let Fields = ["Opcode", "is_buffer"]; + + let PrimaryKey = ["Opcode"]; + let PrimaryKeyName = "getSMEMOpcodeHelper"; +} diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -294,6 +294,9 @@ bool getMUBUFHasSoffset(unsigned Opc); LLVM_READONLY +bool getSMEMIsBuffer(unsigned Opc); + +LLVM_READONLY const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, @@ -635,6 +638,15 @@ bool isArgPassedInSGPR(const Argument *Arg); +LLVM_READONLY +bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, + int64_t EncodedOffset); + +LLVM_READONLY +bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, + int64_t EncodedOffset, + bool IsBuffer); + /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate /// offsets. uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -148,10 +148,17 @@ bool has_soffset; }; +struct SMInfo { + uint16_t Opcode; + bool IsBuffer; +}; + #define GET_MTBUFInfoTable_DECL #define GET_MTBUFInfoTable_IMPL #define GET_MUBUFInfoTable_DECL #define GET_MUBUFInfoTable_IMPL +#define GET_SMInfoTable_DECL +#define GET_SMInfoTable_IMPL #include "AMDGPUGenSearchableTables.inc" int getMTBUFBaseOpcode(unsigned Opc) { @@ -214,6 +221,11 @@ return Info ? Info->has_soffset : false; } +bool getSMEMIsBuffer(unsigned Opc) { + const SMInfo *Info = getSMEMOpcodeHelper(Opc); + return Info ? Info->IsBuffer : false; +} + // Wrapper for Tablegen'd function. enum Subtarget is not defined in any // header files, so we need to wrap it in a function that takes unsigned // instead. @@ -1268,12 +1280,20 @@ return isGFX9(ST) || isGFX10(ST); } -static bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, - int64_t EncodedOffset) { +bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, + int64_t EncodedOffset) { return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); } +bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, + int64_t EncodedOffset, + bool IsBuffer) { + return !IsBuffer && + hasSMRDSignedImmOffset(ST) && + isInt<21>(EncodedOffset); +} + static bool isDwordAligned(uint64_t ByteOffset) { return (ByteOffset & 3) == 0; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll @@ -93,7 +93,7 @@ ; GCN-LABEL: {{^}}smrd6: ; SICIVI: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, -4 ; SICIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0 -; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xfffffffc +; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], -0x4 define amdgpu_kernel void @smrd6(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 { entry: %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 -1 diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll --- a/llvm/test/CodeGen/AMDGPU/smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/smrd.ll @@ -92,7 +92,7 @@ ; GCN-LABEL: {{^}}smrd6: ; SICIVI: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, -4 ; SICIVI: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0 -; GFX9_10: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xfffffffc +; GFX9_10: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, -0x4 define amdgpu_kernel void @smrd6(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 { entry: %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 -1 diff --git a/llvm/test/MC/AMDGPU/smem.s b/llvm/test/MC/AMDGPU/smem.s --- a/llvm/test/MC/AMDGPU/smem.s +++ b/llvm/test/MC/AMDGPU/smem.s @@ -608,3 +608,155 @@ // GFX9: s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xa9,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xa9,0xf5,0x00,0x00,0x00,0x00] // NOSICIVI: error: + +//===----------------------------------------------------------------------===// +// Unsigned 20-bit offsets (VI+) +//===----------------------------------------------------------------------===// + +s_atc_probe 0x7, s[4:5], 0xFFFFF +// NOSICI: error: instruction not supported on this GPU +// GFX89: s_atc_probe 7, s[4:5], 0xfffff ; encoding: [0xc2,0x01,0x9a,0xc0,0xff,0xff,0x0f,0x00] +// GFX10: s_atc_probe 7, s[4:5], 0xfffff ; encoding: [0xc2,0x01,0x98,0xf4,0xff,0xff,0x0f,0xfa] + +s_atc_probe_buffer 0x1, s[8:11], 0xFFFFF +// NOSICI: error: instruction not supported on this GPU +// GFX89: s_atc_probe_buffer 1, s[8:11], 0xfffff ; encoding: [0x44,0x00,0x9e,0xc0,0xff,0xff,0x0f,0x00] +// GFX10: s_atc_probe_buffer 1, s[8:11], 0xfffff ; encoding: [0x44,0x00,0x9c,0xf4,0xff,0xff,0x0f,0xfa] + +s_store_dword s1, s[2:3], 0xFFFFF +// NOSICI: error: instruction not supported on this GPU +// GFX89: s_store_dword s1, s[2:3], 0xfffff ; encoding: [0x41,0x00,0x42,0xc0,0xff,0xff,0x0f,0x00] +// GFX10: s_store_dword s1, s[2:3], 0xfffff ; encoding: [0x41,0x00,0x40,0xf4,0xff,0xff,0x0f,0xfa] + +s_buffer_store_dword s10, s[92:95], 0xFFFFF +// NOSICI: error: instruction not supported on this GPU +// GFX89: s_buffer_store_dword s10, s[92:95], 0xfffff ; encoding: [0xae,0x02,0x62,0xc0,0xff,0xff,0x0f,0x00] +// GFX10: s_buffer_store_dword s10, s[92:95], 0xfffff ; encoding: [0xae,0x02,0x60,0xf4,0xff,0xff,0x0f,0xfa] + +s_atomic_swap s5, s[2:3], 0xFFFFF +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_atomic_swap s5, s[2:3], 0xfffff ; encoding: [0x41,0x01,0x00,0xf6,0xff,0xff,0x0f,0xfa] +// GFX9: s_atomic_swap s5, s[2:3], 0xfffff ; encoding: [0x41,0x01,0x02,0xc2,0xff,0xff,0x0f,0x00] + +s_buffer_atomic_swap s5, s[4:7], 0xFFFFF +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_buffer_atomic_swap s5, s[4:7], 0xfffff ; encoding: [0x42,0x01,0x00,0xf5,0xff,0xff,0x0f,0xfa] +// GFX9: s_buffer_atomic_swap s5, s[4:7], 0xfffff ; encoding: [0x42,0x01,0x02,0xc1,0xff,0xff,0x0f,0x00] + +s_atc_probe 0x7, s[4:5], 0x1FFFFF +// NOSICI: error: instruction not supported on this GPU +// NOGFX9: error: expected a 21-bit signed offset +// NOVI: error: expected a 20-bit unsigned offset + +s_atc_probe_buffer 0x1, s[8:11], 0x1FFFFF +// NOSICI: error: instruction not supported on this GPU +// NOGFX9: error: expected a 20-bit unsigned offset +// NOVI: error: expected a 20-bit unsigned offset + +s_store_dword s1, s[2:3], 0x1FFFFF +// NOSICI: error: instruction not supported on this GPU +// NOGFX9: error: expected a 21-bit signed offset +// NOVI: error: expected a 20-bit unsigned offset + +s_buffer_store_dword s10, s[92:95], 0x1FFFFF +// NOSICI: error: instruction not supported on this GPU +// NOGFX9: error: expected a 20-bit unsigned offset +// NOVI: error: expected a 20-bit unsigned offset + +s_atomic_swap s5, s[2:3], 0x1FFFFF +// NOSICIVI: error: instruction not supported on this GPU +// NOGFX9: error: expected a 21-bit signed offset + +s_buffer_atomic_swap s5, s[4:7], 0x1FFFFF +// NOSICIVI: error: instruction not supported on this GPU +// NOGFX9: error: expected a 20-bit unsigned offset + +//===----------------------------------------------------------------------===// +// Signed offsets (gfx9+) +//===----------------------------------------------------------------------===// + +s_atc_probe 0x7, s[4:5], -1 +// NOVI: error: expected a 20-bit unsigned offset +// GFX9: s_atc_probe 7, s[4:5], -0x1 ; encoding: [0xc2,0x01,0x9a,0xc0,0xff,0xff,0x1f,0x00] +// GFX10: s_atc_probe 7, s[4:5], -0x1 ; encoding: [0xc2,0x01,0x98,0xf4,0xff,0xff,0x1f,0xfa] +// NOSICI: error: instruction not supported on this GPU + +s_atc_probe_buffer 0x1, s[8:11], -1 +// NOVI: error: expected a 20-bit unsigned offset +// NOSICI: error: instruction not supported on this GPU +// NOGFX9: error: expected a 20-bit unsigned offset + +s_store_dword s1, s[2:3], -1 +// NOVI: error: expected a 20-bit unsigned offset +// GFX9: s_store_dword s1, s[2:3], -0x1 ; encoding: [0x41,0x00,0x42,0xc0,0xff,0xff,0x1f,0x00] +// GFX10: s_store_dword s1, s[2:3], -0x1 ; encoding: [0x41,0x00,0x40,0xf4,0xff,0xff,0x1f,0xfa] +// NOSICI: error: instruction not supported on this GPU + +s_buffer_store_dword s10, s[92:95], -1 +// NOVI: error: expected a 20-bit unsigned offset +// NOSICI: error: instruction not supported on this GPU +// NOGFX9: error: expected a 20-bit unsigned offset + +s_load_dword s1, s[2:3], -1 +// NOVI: error: expected a 20-bit unsigned offset +// GFX9: s_load_dword s1, s[2:3], -0x1 ; encoding: [0x41,0x00,0x02,0xc0,0xff,0xff,0x1f,0x00] +// GFX10: s_load_dword s1, s[2:3], -0x1 ; encoding: [0x41,0x00,0x00,0xf4,0xff,0xff,0x1f,0xfa] +// NOSICI: error: instruction not supported on this GPU + +s_buffer_load_dword s10, s[92:95], -1 +// NOVI: error: expected a 20-bit unsigned offset +// NOSICI: error: instruction not supported on this GPU +// NOGFX9: error: expected a 20-bit unsigned offset + +s_atomic_swap s5, s[2:3], -1 +// NOVI: error: instruction not supported on this GPU +// GFX9: s_atomic_swap s5, s[2:3], -0x1 ; encoding: [0x41,0x01,0x02,0xc2,0xff,0xff,0x1f,0x00] +// GFX10: s_atomic_swap s5, s[2:3], -0x1 ; encoding: [0x41,0x01,0x00,0xf6,0xff,0xff,0x1f,0xfa] +// NOSICI: error: instruction not supported on this GPU + +s_buffer_atomic_swap s5, s[4:7], -1 +// NOVI: error: instruction not supported on this GPU +// NOSICI: error: instruction not supported on this GPU +// NOGFX9: error: expected a 20-bit unsigned offset + +s_atc_probe 0x7, s[4:5], 0xFFFFFFFFFFF00000 +// NOSICI: error: instruction not supported on this GPU +// GFX10: s_atc_probe 7, s[4:5], -0x100000 ; encoding: [0xc2,0x01,0x98,0xf4,0x00,0x00,0x10,0xfa] +// GFX9: s_atc_probe 7, s[4:5], -0x100000 ; encoding: [0xc2,0x01,0x9a,0xc0,0x00,0x00,0x10,0x00] +// NOVI: error: expected a 20-bit unsigned offset + +s_atc_probe_buffer 0x1, s[8:11], 0xFFFFFFFFFFF00000 +// NOSICI: error: instruction not supported on this GPU +// NOGFX9: error: expected a 20-bit unsigned offset +// NOVI: error: expected a 20-bit unsigned offset + +s_store_dword s1, s[2:3], 0xFFFFFFFFFFF00000 +// NOSICI: error: instruction not supported on this GPU +// GFX10: s_store_dword s1, s[2:3], -0x100000 ; encoding: [0x41,0x00,0x40,0xf4,0x00,0x00,0x10,0xfa] +// GFX9: s_store_dword s1, s[2:3], -0x100000 ; encoding: [0x41,0x00,0x42,0xc0,0x00,0x00,0x10,0x00] +// NOVI: error: expected a 20-bit unsigned offset + +s_buffer_store_dword s10, s[92:95], 0xFFFFFFFFFFF00000 +// NOSICI: error: instruction not supported on this GPU +// NOGFX9: error: expected a 20-bit unsigned offset +// NOVI: error: expected a 20-bit unsigned offset + +s_load_dword s1, s[2:3], 0xFFFFFFFFFFF00000 +// NOSICI: error: instruction not supported on this GPU +// GFX10: s_load_dword s1, s[2:3], -0x100000 ; encoding: [0x41,0x00,0x00,0xf4,0x00,0x00,0x10,0xfa] +// GFX9: s_load_dword s1, s[2:3], -0x100000 ; encoding: [0x41,0x00,0x02,0xc0,0x00,0x00,0x10,0x00] +// NOVI: error: expected a 20-bit unsigned offset + +s_buffer_load_dword s10, s[92:95], 0xFFFFFFFFFFF00000 +// NOSICI: error: instruction not supported on this GPU +// NOGFX9: error: expected a 20-bit unsigned offset +// NOVI: error: expected a 20-bit unsigned offset + +s_atomic_swap s5, s[2:3], 0xFFFFFFFFFFF00000 +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_atomic_swap s5, s[2:3], -0x100000 ; encoding: [0x41,0x01,0x00,0xf6,0x00,0x00,0x10,0xfa] +// GFX9: s_atomic_swap s5, s[2:3], -0x100000 ; encoding: [0x41,0x01,0x02,0xc2,0x00,0x00,0x10,0x00] + +s_buffer_atomic_swap s5, s[4:7], 0xFFFFFFFFFFF00000 +// NOSICIVI: error: instruction not supported on this GPU +// NOGFX9: error: expected a 20-bit unsigned offset diff --git a/llvm/test/MC/AMDGPU/smrd.s b/llvm/test/MC/AMDGPU/smrd.s --- a/llvm/test/MC/AMDGPU/smrd.s +++ b/llvm/test/MC/AMDGPU/smrd.s @@ -32,12 +32,12 @@ s_load_dword s1, s[2:3], 0x100000 // NOSI: error: instruction not supported on this GPU // CI: s_load_dword s1, s[2:3], 0x100000 ; encoding: [0xff,0x82,0x00,0xc0,0x00,0x00,0x10,0x00] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: expected a 20-bit unsigned offset s_load_dword s1, s[2:3], 0xffffffff // NOSI: error: instruction not supported on this GPU // CI: s_load_dword s1, s[2:3], 0xffffffff ; encoding: [0xff,0x82,0x00,0xc0,0xff,0xff,0xff,0xff] -// NOVI: error: instruction not supported on this GPU +// NOVI: error: expected a 20-bit unsigned offset //===----------------------------------------------------------------------===// // Instructions diff --git a/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt b/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt --- a/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt @@ -234,3 +234,60 @@ # GFX9: s_buffer_atomic_xor_x2 s[100:101], s[4:7], s0 ; encoding: [0x02,0x19,0xa8,0xc1,0x00,0x00,0x00,0x00] 0x02,0x19,0xa8,0xc1,0x00,0x00,0x00,0x00 + +#===------------------------------------------------------------------------===# +# Unsigned 20-bit offsets +#===------------------------------------------------------------------------===# + +# GFX9: s_atc_probe 7, s[4:5], 0xfffff ; encoding: [0xc2,0x01,0x9a,0xc0,0xff,0xff,0x0f,0x00] +0xc2,0x01,0x9a,0xc0,0xff,0xff,0x0f,0x00 + +# GFX9: s_atc_probe_buffer 1, s[8:11], 0xfffff ; encoding: [0x44,0x00,0x9e,0xc0,0xff,0xff,0x0f,0x00] +0x44,0x00,0x9e,0xc0,0xff,0xff,0x0f,0x00 + +# GFX9: s_store_dword s1, s[2:3], 0xfffff ; encoding: [0x41,0x00,0x42,0xc0,0xff,0xff,0x0f,0x00] +0x41,0x00,0x42,0xc0,0xff,0xff,0x0f,0x00 + +# GFX9: s_buffer_store_dword s10, s[92:95], 0xfffff ; encoding: [0xae,0x02,0x62,0xc0,0xff,0xff,0x0f,0x00] +0xae,0x02,0x62,0xc0,0xff,0xff,0x0f,0x00 + +# GFX9: s_atomic_swap s5, s[2:3], 0xfffff ; encoding: [0x41,0x01,0x02,0xc2,0xff,0xff,0x0f,0x00] +0x41,0x01,0x02,0xc2,0xff,0xff,0x0f,0x00 + +# GFX9: s_buffer_atomic_swap s5, s[4:7], 0xfffff ; encoding: [0x42,0x01,0x02,0xc1,0xff,0xff,0x0f,0x00] +0x42,0x01,0x02,0xc1,0xff,0xff,0x0f,0x00 + +#===------------------------------------------------------------------------===# +# Signed 20-bit offsets +#===------------------------------------------------------------------------===# + +# GFX9: s_atc_probe 7, s[4:5], -0x1 ; encoding: [0xc2,0x01,0x9a,0xc0,0xff,0xff,0x1f,0x00] +0xc2,0x01,0x9a,0xc0,0xff,0xff,0x1f,0x00 + +# GFX9: s_store_dword s1, s[2:3], -0x1 ; encoding: [0x41,0x00,0x42,0xc0,0xff,0xff,0x1f,0x00] +0x41,0x00,0x42,0xc0,0xff,0xff,0x1f,0x00 + +# GFX9: s_load_dword s1, s[2:3], -0x1 ; encoding: [0x41,0x00,0x02,0xc0,0xff,0xff,0x1f,0x00] +0x41,0x00,0x02,0xc0,0xff,0xff,0x1f,0x00 + +# GFX9: s_atomic_swap s5, s[2:3], -0x1 ; encoding: [0x41,0x01,0x02,0xc2,0xff,0xff,0x1f,0x00] +0x41,0x01,0x02,0xc2,0xff,0xff,0x1f,0x00 + +# GFX9: s_atc_probe 7, s[4:5], -0x100000 ; encoding: [0xc2,0x01,0x9a,0xc0,0x00,0x00,0x10,0x00] +0xc2,0x01,0x9a,0xc0,0x00,0x00,0x10,0x00 + +# GFX9: s_store_dword s1, s[2:3], -0x100000 ; encoding: [0x41,0x00,0x42,0xc0,0x00,0x00,0x10,0x00] +0x41,0x00,0x42,0xc0,0x00,0x00,0x10,0x00 + +# GFX9: s_load_dword s1, s[2:3], -0x100000 ; encoding: [0x41,0x00,0x02,0xc0,0x00,0x00,0x10,0x00] +0x41,0x00,0x02,0xc0,0x00,0x00,0x10,0x00 + +# GFX9: s_atomic_swap s5, s[2:3], -0x100000 ; encoding: [0x41,0x01,0x02,0xc2,0x00,0x00,0x10,0x00] +0x41,0x01,0x02,0xc2,0x00,0x00,0x10,0x00 + +#===------------------------------------------------------------------------===# +# Decoding of invalid buffer offsets +#===------------------------------------------------------------------------===# + +# GFX9: s_atc_probe_buffer 1, s[8:11], -0x1 ; encoding: [0x44,0x00,0x9e,0xc0,0xff,0xff,0x1f,0x00] +0x44,0x00,0x9e,0xc0,0xff,0xff,0x1f,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/smem_vi.txt b/llvm/test/MC/Disassembler/AMDGPU/smem_vi.txt --- a/llvm/test/MC/Disassembler/AMDGPU/smem_vi.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/smem_vi.txt @@ -56,3 +56,32 @@ # VI: s_atc_probe_buffer 0, s[8:11], s101 ; encoding: [0x04,0x00,0x9c,0xc0,0x65,0x00,0x00,0x00] 0x04,0x00,0x9c,0xc0,0x65,0x00,0x00,0x00 + +#===------------------------------------------------------------------------===# +# Unsigned 20-bit offsets +#===------------------------------------------------------------------------===# + +# VI: s_atc_probe 7, s[4:5], 0xfffff ; encoding: [0xc2,0x01,0x9a,0xc0,0xff,0xff,0x0f,0x00] +0xc2,0x01,0x9a,0xc0,0xff,0xff,0x0f,0x00 + +# VI: s_atc_probe_buffer 1, s[8:11], 0xfffff ; encoding: [0x44,0x00,0x9e,0xc0,0xff,0xff,0x0f,0x00] +0x44,0x00,0x9e,0xc0,0xff,0xff,0x0f,0x00 + +# VI: s_load_dword s1, s[2:3], 0xfffff ; encoding: [0x41,0x00,0x02,0xc0,0xff,0xff,0x0f,0x00] +0x41,0x00,0x02,0xc0,0xff,0xff,0x0f,0x00 + +# VI: s_store_dword s1, s[2:3], 0xfffff ; encoding: [0x41,0x00,0x42,0xc0,0xff,0xff,0x0f,0x00] +0x41,0x00,0x42,0xc0,0xff,0xff,0x0f,0x00 + +# VI: s_buffer_store_dword s10, s[92:95], 0xfffff ; encoding: [0xae,0x02,0x62,0xc0,0xff,0xff,0x0f,0x00] +0xae,0x02,0x62,0xc0,0xff,0xff,0x0f,0x00 + +#===------------------------------------------------------------------------===# +# Decoding of invalid (21-bit) buffer offsets +#===------------------------------------------------------------------------===# + +# VI: s_store_dword s1, s[2:3], 0xfffff ; encoding: [0x41,0x00,0x42,0xc0,0xff,0xff,0x0f,0x00] +0x41,0x00,0x42,0xc0,0xff,0xff,0x1f,0x00 + +# VI: s_buffer_store_dword s10, s[92:95], 0xfffff ; encoding: [0xae,0x02,0x62,0xc0,0xff,0xff,0x0f,0x00] +0xae,0x02,0x62,0xc0,0xff,0xff,0x1f,0x00