diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1624,6 +1624,8 @@ bool validateDivScale(const MCInst &Inst); bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, const SMLoc &IDLoc); + bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, + const SMLoc &IDLoc); Optional validateLdsDirect(const MCInst &Inst); unsigned getConstantBusLimit(unsigned Opcode) const; bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); @@ -4417,6 +4419,31 @@ return true; } +bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, + const OperandVector &Operands, + const SMLoc &IDLoc) { + if (isGFX940()) + return true; + + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != + (SIInstrFlags::VALU | SIInstrFlags::FLAT)) + return true; + // This is FLAT LDS DMA. + + SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); + StringRef CStr(S.getPointer()); + if (!CStr.startswith("lds")) { + // This is incorrectly selected LDS DMA version of a FLAT load opcode. + // And LDS version should have 'lds' modifier, but it follows optional + // operands so its absense is ignored by the matcher. + Error(IDLoc, "invalid operands for instruction"); + return false; + } + + return true; +} + bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands) { @@ -4532,6 +4559,10 @@ return false; } + if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { + return false; + } + return true; } diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -58,6 +58,7 @@ bits<1> has_sccb = 1; bits<1> sccbValue = 0; bits<1> has_sve = 0; // Scratch VGPR Enable + bits<1> lds = 0; bits<1> sve = 0; let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, @@ -110,7 +111,7 @@ bits<5> cpol; // Only valid on gfx9 - bits<1> lds = 0; // XXX - What does this actually do? + bits<1> lds = ps.lds; // LDS DMA for global and scratch // Segment, 00=flat, 01=scratch, 10=global, 11=reserved bits<2> seg = !if(ps.is_flat_global, 0b10, @@ -253,6 +254,7 @@ " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> { let LGKM_CNT = 1; let is_flat_global = 1; + let lds = 1; let has_data = 0; let has_vdst = 0; let mayLoad = 1; @@ -411,6 +413,7 @@ let LGKM_CNT = 1; let is_flat_scratch = 1; + let lds = 1; let has_data = 0; let has_vdst = 0; let mayLoad = 1; @@ -833,15 +836,12 @@ defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub", VGPR_32, i32>; -let SubtargetPredicate = isGFX940Plus in { - defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">; defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">; defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">; defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; -} // End let SubtargetPredicate = isGFX940Plus } // End is_flat_global = 1 @@ -873,16 +873,12 @@ defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; -let SubtargetPredicate = isGFX940Plus in { - defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">; defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">; defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">; defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">; defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">; -} // End let SubtargetPredicate = isGFX940Plus - } // End SubtargetPredicate = HasFlatScratchInsts let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { @@ -1655,6 +1651,33 @@ } } +multiclass FLAT_Real_AllAddr_LDS op, bits<7> pre_gfx940_op, + string pre_gfx940_name = !subst("_lds", "", !cast(NAME).PseudoInstr), + bit has_sccb = !cast(NAME).has_sccb> { + + let OtherPredicates = [isGFX8GFX9NotGFX940] in { + def _vi : FLAT_Real_vi(NAME), has_sccb> { + let AsmString = pre_gfx940_name # !cast(NAME).AsmOperands # " lds"; + } + def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR"), has_sccb> { + let AsmString = pre_gfx940_name # !cast(NAME#"_SADDR").AsmOperands # " lds"; + } + } + + let SubtargetPredicate = isGFX940Plus in { + def _gfx940 : FLAT_Real_gfx940(NAME)>; + def _SADDR_gfx940 : FLAT_Real_gfx940(NAME#"_SADDR")>; + } +} + +multiclass FLAT_Real_AllAddr_SVE_LDS op, bits<7> pre_gfx940_op> { + defm "" : FLAT_Real_AllAddr_LDS; + let SubtargetPredicate = isGFX940Plus in { + def _SVS_gfx940 : FLAT_Real_gfx940(NAME#"_SVS")>; + def _ST_gfx940 : FLAT_Real_gfx940(NAME#"_ST")>; + } +} + def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; @@ -1746,13 +1769,11 @@ defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; -let AssemblerPredicate = isGFX940Plus in { -defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_vi <0x026>; -defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_vi <0x027>; -defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_vi <0x028>; -defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_vi <0x029>; -defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_vi <0x02a>; -} // End let AssemblerPredicate = isGFX940Plus +defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS <0x026, 0x10>; +defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS <0x027, 0x11>; +defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>; +defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>; +defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS <0x02a, 0x14>; defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; @@ -1781,13 +1802,11 @@ defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; -let AssemblerPredicate = isGFX940Plus in { -defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x026>; -defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x027>; -defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_vi <0x028>; -defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_vi <0x029>; -defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_AllAddr_SVE_vi <0x02a>; -} // End let AssemblerPredicate = isGFX940Plus +defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_SVE_LDS <0x026, 0x10>; +defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_SVE_LDS <0x027, 0x11>; +defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_LDS <0x028, 0x12>; +defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_LDS <0x029, 0x13>; +defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_AllAddr_SVE_LDS <0x02a, 0x14>; defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x10>; defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x11>; @@ -1926,6 +1945,23 @@ FLAT_Real_SADDR_gfx10, FLAT_Real_ST_gfx10; +multiclass FLAT_Real_AllAddr_LDS_gfx10 op, + string opname = !subst("_lds", "", !cast(NAME).PseudoInstr)> { + let AsmString = opname # !cast(NAME).AsmOperands # " lds" in + defm "" : FLAT_Real_Base_gfx10; + + let AsmString = opname # !cast(NAME#"_SADDR").AsmOperands # " lds" in + defm "" : FLAT_Real_SADDR_gfx10; +} + +multiclass FLAT_Real_ScratchAllAddr_LDS_gfx10 op, + string opname = !subst("_lds", "", !cast(NAME).PseudoInstr)> { + defm "" : FLAT_Real_AllAddr_LDS_gfx10; + + let AsmString = opname # !cast(NAME#"_ST").AsmOperands # " lds" in + defm "" : FLAT_Real_ST_gfx10; +} + // ENC_FLAT. defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; @@ -2042,6 +2078,12 @@ defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>; defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>; +defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x008>; +defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x009>; +defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00a>; +defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00b>; +defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS_gfx10 <0x00c>; + // ENC_FLAT_SCRATCH. defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>; defm SCRATCH_LOAD_SBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x009>; @@ -2065,3 +2107,9 @@ defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>; defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>; defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>; + +defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x008>; +defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x009>; +defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00a>; +defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00b>; +defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00c>; diff --git a/llvm/test/MC/AMDGPU/gfx1030_new.s b/llvm/test/MC/AMDGPU/gfx1030_new.s --- a/llvm/test/MC/AMDGPU/gfx1030_new.s +++ b/llvm/test/MC/AMDGPU/gfx1030_new.s @@ -204,3 +204,18 @@ s_waitcnt_depctr depctr_hold_cnt(1), depctr_sa_sdst(1), depctr_va_vdst(14), depctr_va_sdst(6), depctr_va_ssrc(1), depctr_va_vcc(1), depctr_vm_vsrc(6) // GFX10: encoding: [0x9b,0xed,0xa3,0xbf] + +scratch_load_dword off, off offset:1024 lds +// GFX10: [0x00,0x64,0x30,0xdc,0x00,0x00,0x7f,0x00] + +scratch_load_ubyte off, off offset:1024 lds +// GFX10: [0x00,0x64,0x20,0xdc,0x00,0x00,0x7f,0x00] + +scratch_load_sbyte off, off offset:1024 lds +// GFX10: [0x00,0x64,0x24,0xdc,0x00,0x00,0x7f,0x00] + +scratch_load_ushort off, off offset:1024 lds +// GFX10: [0x00,0x64,0x28,0xdc,0x00,0x00,0x7f,0x00] + +scratch_load_sshort off, off offset:1024 lds +// GFX10: [0x00,0x64,0x2c,0xdc,0x00,0x00,0x7f,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_flat.s b/llvm/test/MC/AMDGPU/gfx10_asm_flat.s --- a/llvm/test/MC/AMDGPU/gfx10_asm_flat.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_flat.s @@ -359,6 +359,69 @@ global_atomic_xor_x2 v[1:2], v[2:3], off dlc // GFX10: [0x00,0x90,0x6c,0xdd,0x01,0x02,0x7d,0x00] +global_load_dword v2, s[4:5] offset:1024 lds +// GFX10: [0x00,0xa4,0x30,0xdc,0x02,0x00,0x04,0x00] + +global_load_dword v2, s[4:5] offset:1024 lds +// GFX10: [0x00,0xa4,0x30,0xdc,0x02,0x00,0x04,0x00] + +global_load_ubyte v2, s[4:5] offset:1024 lds +// GFX10: [0x00,0xa4,0x20,0xdc,0x02,0x00,0x04,0x00] + +global_load_sbyte v2, s[4:5] offset:1024 lds +// GFX10: [0x00,0xa4,0x24,0xdc,0x02,0x00,0x04,0x00] + +global_load_ushort v2, s[4:5] offset:1024 lds +// GFX10: [0x00,0xa4,0x28,0xdc,0x02,0x00,0x04,0x00] + +global_load_sshort v2, s[4:5] offset:1024 lds +// GFX10: [0x00,0xa4,0x2c,0xdc,0x02,0x00,0x04,0x00] + +global_load_dword v[2:3], off offset:1024 lds +// GFX10: [0x00,0xa4,0x30,0xdc,0x02,0x00,0x7d,0x00] + +global_load_ubyte v[2:3], off offset:1024 lds +// GFX10: [0x00,0xa4,0x20,0xdc,0x02,0x00,0x7d,0x00] + +global_load_sbyte v[2:3], off offset:1024 lds +// GFX10: [0x00,0xa4,0x24,0xdc,0x02,0x00,0x7d,0x00] + +global_load_ushort v[2:3], off offset:1024 lds +// GFX10: [0x00,0xa4,0x28,0xdc,0x02,0x00,0x7d,0x00] + +global_load_sshort v[2:3], off offset:1024 lds +// GFX10: [0x00,0xa4,0x2c,0xdc,0x02,0x00,0x7d,0x00] + +scratch_load_dword v2, off offset:1024 lds +// GFX10: [0x00,0x64,0x30,0xdc,0x02,0x00,0x7d,0x00] + +scratch_load_ubyte v2, off offset:1024 lds +// GFX10: [0x00,0x64,0x20,0xdc,0x02,0x00,0x7d,0x00] + +scratch_load_sbyte v2, off offset:1024 lds +// GFX10: [0x00,0x64,0x24,0xdc,0x02,0x00,0x7d,0x00] + +scratch_load_ushort v2, off offset:1024 lds +// GFX10: [0x00,0x64,0x28,0xdc,0x02,0x00,0x7d,0x00] + +scratch_load_sshort v2, off offset:1024 lds +// GFX10: [0x00,0x64,0x2c,0xdc,0x02,0x00,0x7d,0x00] + +scratch_load_dword off, s4 offset:1024 lds +// GFX10: [0x00,0x64,0x30,0xdc,0x00,0x00,0x04,0x00] + +scratch_load_ubyte off, s4 offset:1024 lds +// GFX10: [0x00,0x64,0x20,0xdc,0x00,0x00,0x04,0x00] + +scratch_load_sbyte off, s4 offset:1024 lds +// GFX10: [0x00,0x64,0x24,0xdc,0x00,0x00,0x04,0x00] + +scratch_load_ushort off, s4 offset:1024 lds +// GFX10: [0x00,0x64,0x28,0xdc,0x00,0x00,0x04,0x00] + +scratch_load_sshort off, s4 offset:1024 lds +// GFX10: [0x00,0x64,0x2c,0xdc,0x00,0x00,0x04,0x00] + //===----------------------------------------------------------------------===// // Also see flat-gfx10.s, flat-global.s, flat-scratch-instructions.s. //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/gfx8_unsupported.s b/llvm/test/MC/AMDGPU/gfx8_unsupported.s --- a/llvm/test/MC/AMDGPU/gfx8_unsupported.s +++ b/llvm/test/MC/AMDGPU/gfx8_unsupported.s @@ -1828,6 +1828,18 @@ v_xor3_b32 v255, v1, v2, v3 // CHECK: error: instruction not supported on this GPU +global_load_lds_dword v[2:3], off +// CHECK: error: instruction not supported on this GPU + +global_load_dword v[2:3], off lds +// CHECK: error: instruction not supported on this GPU + +scratch_load_dword v2, off lds +// CHECK: error: instruction not supported on this GPU + +scratch_load_dword off, s2 lds +// CHECK: error: instruction not supported on this GPU + //===----------------------------------------------------------------------===// // Unsupported e32 variants. //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/gfx9-asm-err.s b/llvm/test/MC/AMDGPU/gfx9-asm-err.s --- a/llvm/test/MC/AMDGPU/gfx9-asm-err.s +++ b/llvm/test/MC/AMDGPU/gfx9-asm-err.s @@ -32,3 +32,12 @@ v_cvt_u32_f64 v5, v[0:1] quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf // GFX9ERR: error: not a valid operand. + +global_load_lds_dword v[2:3], off +// GFX9ERR: error: instruction not supported on this GPU + +global_load_dword v[2:3], off +// GFX9ERR: error: invalid operands for instruction + +scratch_load_dword v2, off, offset:256 +// GFX9ERR: error: invalid operands for instruction diff --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s --- a/llvm/test/MC/AMDGPU/gfx90a_err.s +++ b/llvm/test/MC/AMDGPU/gfx90a_err.s @@ -284,3 +284,9 @@ exp pos0 v3, v2, v1, v0 // GFX90A: error: instruction not supported on this GPU + +global_load_lds_dword v[2:3], off +// GFX90A: error: instruction not supported on this GPU + +scratch_load_lds_dword v2, off +// GFX90A: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx940_err.s b/llvm/test/MC/AMDGPU/gfx940_err.s --- a/llvm/test/MC/AMDGPU/gfx940_err.s +++ b/llvm/test/MC/AMDGPU/gfx940_err.s @@ -84,3 +84,9 @@ exp pos0 v3, v2, v1, v0 // GFX940: error: instruction not supported on this GPU + +global_load_dword v[2:3], off lds +// GFX940: error: operands are not valid for this GPU or mode + +scratch_load_dword v2, off lds +// GFX940: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_flat.s b/llvm/test/MC/AMDGPU/gfx9_asm_flat.s --- a/llvm/test/MC/AMDGPU/gfx9_asm_flat.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_flat.s @@ -4301,3 +4301,72 @@ scratch_load_short_d16_hi v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05] + +global_load_dword v[2:3], off lds +// CHECK: [0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00] + +global_load_dword v[2:3], off offset:2048 lds +// CHECK: [0x00,0xa8,0x50,0xdc,0x02,0x00,0x7f,0x00] + +global_load_ubyte v[2:3], off offset:2048 lds +// CHECK: [0x00,0xa8,0x40,0xdc,0x02,0x00,0x7f,0x00] + +global_load_sbyte v[2:3], off offset:2048 lds +// CHECK: [0x00,0xa8,0x44,0xdc,0x02,0x00,0x7f,0x00] + +global_load_ushort v[2:3], off offset:2048 lds +// CHECK: [0x00,0xa8,0x48,0xdc,0x02,0x00,0x7f,0x00] + +global_load_sshort v[2:3], off offset:2048 lds +// CHECK: [0x00,0xa8,0x4c,0xdc,0x02,0x00,0x7f,0x00] + +global_load_dword v2, s[4:5] offset:2048 lds +// CHECK: [0x00,0xa8,0x50,0xdc,0x02,0x00,0x04,0x00] + +global_load_ubyte v2, s[4:5] offset:2048 lds +// CHECK: [0x00,0xa8,0x40,0xdc,0x02,0x00,0x04,0x00] + +global_load_sbyte v2, s[4:5] offset:2048 lds +// CHECK: [0x00,0xa8,0x44,0xdc,0x02,0x00,0x04,0x00] + +global_load_ushort v2, s[4:5] offset:2048 lds +// CHECK: [0x00,0xa8,0x48,0xdc,0x02,0x00,0x04,0x00] + +global_load_sshort v2, s[4:5] offset:2048 lds +// CHECK: [0x00,0xa8,0x4c,0xdc,0x02,0x00,0x04,0x00] + +scratch_load_dword v2, off lds +// CHECK: [0x00,0x60,0x50,0xdc,0x02,0x00,0x7f,0x00] + +scratch_load_dword v2, off offset:2048 lds +// CHECK: [0x00,0x68,0x50,0xdc,0x02,0x00,0x7f,0x00] + +scratch_load_dword v2, off offset:2048 lds +// CHECK: [0x00,0x68,0x50,0xdc,0x02,0x00,0x7f,0x00] + +scratch_load_ubyte v2, off offset:2048 lds +// CHECK: [0x00,0x68,0x40,0xdc,0x02,0x00,0x7f,0x00] + +scratch_load_sbyte v2, off offset:2048 lds +// CHECK: [0x00,0x68,0x44,0xdc,0x02,0x00,0x7f,0x00] + +scratch_load_ushort v2, off offset:2048 lds +// CHECK: [0x00,0x68,0x48,0xdc,0x02,0x00,0x7f,0x00] + +scratch_load_sshort v2, off offset:2048 lds +// CHECK: [0x00,0x68,0x4c,0xdc,0x02,0x00,0x7f,0x00] + +scratch_load_dword off, s4 offset:2048 lds +// CHECK: [0x00,0x68,0x50,0xdc,0x00,0x00,0x04,0x00] + +scratch_load_ubyte off, s4 offset:2048 lds +// CHECK: [0x00,0x68,0x40,0xdc,0x00,0x00,0x04,0x00] + +scratch_load_sbyte off, s4 offset:2048 lds +// CHECK: [0x00,0x68,0x44,0xdc,0x00,0x00,0x04,0x00] + +scratch_load_ushort off, s4 offset:2048 lds +// CHECK: [0x00,0x68,0x48,0xdc,0x00,0x00,0x04,0x00] + +scratch_load_sshort off, s4 offset:2048 lds +// CHECK: [0x00,0x68,0x4c,0xdc,0x00,0x00,0x04,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt @@ -246,3 +246,18 @@ # GFX10: s_waitcnt_depctr depctr_va_vdst(1) depctr_va_sdst(1) depctr_vm_vsrc(1) ; encoding: [0x87,0x13,0xa3,0xbf] 0x87,0x13,0xa3,0xbf + +# GFX10: scratch_load_dword off, off offset:1024 lds ; encoding: [0x00,0x64,0x30,0xdc,0x00,0x00,0x7f,0x00] +0x00,0x64,0x30,0xdc,0x00,0x00,0x7f,0x00 + +# GFX10: scratch_load_ubyte off, off offset:1024 lds ; encoding: [0x00,0x64,0x20,0xdc,0x00,0x00,0x7f,0x00] +0x00,0x64,0x20,0xdc,0x00,0x00,0x7f,0x00 + +# GFX10: scratch_load_sbyte off, off offset:1024 lds ; encoding: [0x00,0x64,0x24,0xdc,0x00,0x00,0x7f,0x00] +0x00,0x64,0x24,0xdc,0x00,0x00,0x7f,0x00 + +# GFX10: scratch_load_ushort off, off offset:1024 lds ; encoding: [0x00,0x64,0x28,0xdc,0x00,0x00,0x7f,0x00] +0x00,0x64,0x28,0xdc,0x00,0x00,0x7f,0x00 + +# GFX10: scratch_load_sshort off, off offset:1024 lds ; encoding: [0x00,0x64,0x2c,0xdc,0x00,0x00,0x7f,0x00] +0x00,0x64,0x2c,0xdc,0x00,0x00,0x7f,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt @@ -98886,3 +98886,66 @@ # GFX10: s_waitcnt_depctr depctr_va_vdst(14) depctr_va_sdst(6) depctr_vm_vsrc(6) ; encoding: [0x1b,0xed,0xa3,0xbf] 0x1b,0xed,0xa3,0xbf + +# GFX10: global_load_dword v2, s[4:5] offset:1024 lds ; encoding: [0x00,0xa4,0x30,0xdc,0x02,0x00,0x04,0x00] +0x00,0xa4,0x30,0xdc,0x02,0x00,0x04,0x00 + +# GFX10: global_load_dword v2, s[4:5] offset:1024 lds ; encoding: [0x00,0xa4,0x30,0xdc,0x02,0x00,0x04,0x00] +0x00,0xa4,0x30,0xdc,0x02,0x00,0x04,0x00 + +# GFX10: global_load_ubyte v2, s[4:5] offset:1024 lds ; encoding: [0x00,0xa4,0x20,0xdc,0x02,0x00,0x04,0x00] +0x00,0xa4,0x20,0xdc,0x02,0x00,0x04,0x00 + +# GFX10: global_load_sbyte v2, s[4:5] offset:1024 lds ; encoding: [0x00,0xa4,0x24,0xdc,0x02,0x00,0x04,0x00] +0x00,0xa4,0x24,0xdc,0x02,0x00,0x04,0x00 + +# GFX10: global_load_ushort v2, s[4:5] offset:1024 lds ; encoding: [0x00,0xa4,0x28,0xdc,0x02,0x00,0x04,0x00] +0x00,0xa4,0x28,0xdc,0x02,0x00,0x04,0x00 + +# GFX10: global_load_sshort v2, s[4:5] offset:1024 lds ; encoding: [0x00,0xa4,0x2c,0xdc,0x02,0x00,0x04,0x00] +0x00,0xa4,0x2c,0xdc,0x02,0x00,0x04,0x00 + +# GFX10: global_load_dword v[2:3], off offset:1024 lds ; encoding: [0x00,0xa4,0x30,0xdc,0x02,0x00,0x7d,0x00] +0x00,0xa4,0x30,0xdc,0x02,0x00,0x7d,0x00 + +# GFX10: global_load_ubyte v[2:3], off offset:1024 lds ; encoding: [0x00,0xa4,0x20,0xdc,0x02,0x00,0x7d,0x00] +0x00,0xa4,0x20,0xdc,0x02,0x00,0x7d,0x00 + +# GFX10: global_load_sbyte v[2:3], off offset:1024 lds ; encoding: [0x00,0xa4,0x24,0xdc,0x02,0x00,0x7d,0x00] +0x00,0xa4,0x24,0xdc,0x02,0x00,0x7d,0x00 + +# GFX10: global_load_ushort v[2:3], off offset:1024 lds ; encoding: [0x00,0xa4,0x28,0xdc,0x02,0x00,0x7d,0x00] +0x00,0xa4,0x28,0xdc,0x02,0x00,0x7d,0x00 + +# GFX10: global_load_sshort v[2:3], off offset:1024 lds ; encoding: [0x00,0xa4,0x2c,0xdc,0x02,0x00,0x7d,0x00] +0x00,0xa4,0x2c,0xdc,0x02,0x00,0x7d,0x00 + +# GFX10: scratch_load_dword v2, off offset:1024 lds ; encoding: [0x00,0x64,0x30,0xdc,0x02,0x00,0x7d,0x00] +0x00,0x64,0x30,0xdc,0x02,0x00,0x7d,0x00 + +# GFX10: scratch_load_ubyte v2, off offset:1024 lds ; encoding: [0x00,0x64,0x20,0xdc,0x02,0x00,0x7d,0x00] +0x00,0x64,0x20,0xdc,0x02,0x00,0x7d,0x00 + +# GFX10: scratch_load_sbyte v2, off offset:1024 lds ; encoding: [0x00,0x64,0x24,0xdc,0x02,0x00,0x7d,0x00] +0x00,0x64,0x24,0xdc,0x02,0x00,0x7d,0x00 + +# GFX10: scratch_load_ushort v2, off offset:1024 lds ; encoding: [0x00,0x64,0x28,0xdc,0x02,0x00,0x7d,0x00] +0x00,0x64,0x28,0xdc,0x02,0x00,0x7d,0x00 + +# GFX10: scratch_load_sshort v2, off offset:1024 lds ; encoding: [0x00,0x64,0x2c,0xdc,0x02,0x00,0x7d,0x00] +0x00,0x64,0x2c,0xdc,0x02,0x00,0x7d,0x00 + +# GFX10: scratch_load_dword off, s4 offset:1024 lds ; encoding: [0x00,0x64,0x30,0xdc,0x00,0x00,0x04,0x00] +0x00,0x64,0x30,0xdc,0x00,0x00,0x04,0x00 + +# GFX10: scratch_load_ubyte off, s4 offset:1024 lds ; encoding: [0x00,0x64,0x20,0xdc,0x00,0x00,0x04,0x00] +0x00,0x64,0x20,0xdc,0x00,0x00,0x04,0x00 + +# GFX10: scratch_load_sbyte off, s4 offset:1024 lds ; encoding: [0x00,0x64,0x24,0xdc,0x00,0x00,0x04,0x00] +0x00,0x64,0x24,0xdc,0x00,0x00,0x04,0x00 + +# GFX10: scratch_load_ushort off, s4 offset:1024 lds ; encoding: [0x00,0x64,0x28,0xdc,0x00,0x00,0x04,0x00] +0x00,0x64,0x28,0xdc,0x00,0x00,0x04,0x00 + +# GFX10: scratch_load_sshort off, s4 offset:1024 lds ; encoding: [0x00,0x64,0x2c,0xdc,0x00,0x00,0x04,0x00] +0x00,0x64,0x2c,0xdc,0x00,0x00,0x04,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt @@ -115550,3 +115550,72 @@ # CHECK: v_cmpx_t_u32_sdwa s[6:7], v1, sext(v2) src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0xbe,0x7d,0x01,0x86,0x06,0x0e] 0xf9,0x04,0xbe,0x7d,0x01,0x86,0x06,0x0e + +# CHECK: global_load_dword v[2:3], off lds ; encoding: [0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00] +0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00 + +# CHECK: global_load_dword v[2:3], off offset:2048 lds ; encoding: [0x00,0xa8,0x50,0xdc,0x02,0x00,0x7f,0x00] +0x00,0xa8,0x50,0xdc,0x02,0x00,0x7f,0x00 + +# CHECK: global_load_ubyte v[2:3], off offset:2048 lds ; encoding: [0x00,0xa8,0x40,0xdc,0x02,0x00,0x7f,0x00] +0x00,0xa8,0x40,0xdc,0x02,0x00,0x7f,0x00 + +# CHECK: global_load_sbyte v[2:3], off offset:2048 lds ; encoding: [0x00,0xa8,0x44,0xdc,0x02,0x00,0x7f,0x00] +0x00,0xa8,0x44,0xdc,0x02,0x00,0x7f,0x00 + +# CHECK: global_load_ushort v[2:3], off offset:2048 lds ; encoding: [0x00,0xa8,0x48,0xdc,0x02,0x00,0x7f,0x00] +0x00,0xa8,0x48,0xdc,0x02,0x00,0x7f,0x00 + +# CHECK: global_load_sshort v[2:3], off offset:2048 lds ; encoding: [0x00,0xa8,0x4c,0xdc,0x02,0x00,0x7f,0x00] +0x00,0xa8,0x4c,0xdc,0x02,0x00,0x7f,0x00 + +# CHECK: global_load_dword v2, s[4:5] offset:2048 lds ; encoding: [0x00,0xa8,0x50,0xdc,0x02,0x00,0x04,0x00] +0x00,0xa8,0x50,0xdc,0x02,0x00,0x04,0x00 + +# CHECK: global_load_ubyte v2, s[4:5] offset:2048 lds ; encoding: [0x00,0xa8,0x40,0xdc,0x02,0x00,0x04,0x00] +0x00,0xa8,0x40,0xdc,0x02,0x00,0x04,0x00 + +# CHECK: global_load_sbyte v2, s[4:5] offset:2048 lds ; encoding: [0x00,0xa8,0x44,0xdc,0x02,0x00,0x04,0x00] +0x00,0xa8,0x44,0xdc,0x02,0x00,0x04,0x00 + +# CHECK: global_load_ushort v2, s[4:5] offset:2048 lds ; encoding: [0x00,0xa8,0x48,0xdc,0x02,0x00,0x04,0x00] +0x00,0xa8,0x48,0xdc,0x02,0x00,0x04,0x00 + +# CHECK: global_load_sshort v2, s[4:5] offset:2048 lds ; encoding: [0x00,0xa8,0x4c,0xdc,0x02,0x00,0x04,0x00] +0x00,0xa8,0x4c,0xdc,0x02,0x00,0x04,0x00 + +# CHECK: scratch_load_dword v2, off lds ; encoding: [0x00,0x60,0x50,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x60,0x50,0xdc,0x02,0x00,0x7f,0x00 + +# CHECK: scratch_load_dword v2, off offset:2048 lds ; encoding: [0x00,0x68,0x50,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x68,0x50,0xdc,0x02,0x00,0x7f,0x00 + +# CHECK: scratch_load_dword v2, off offset:2048 lds ; encoding: [0x00,0x68,0x50,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x68,0x50,0xdc,0x02,0x00,0x7f,0x00 + +# CHECK: scratch_load_ubyte v2, off offset:2048 lds ; encoding: [0x00,0x68,0x40,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x68,0x40,0xdc,0x02,0x00,0x7f,0x00 + +# CHECK: scratch_load_sbyte v2, off offset:2048 lds ; encoding: [0x00,0x68,0x44,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x68,0x44,0xdc,0x02,0x00,0x7f,0x00 + +# CHECK: scratch_load_ushort v2, off offset:2048 lds ; encoding: [0x00,0x68,0x48,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x68,0x48,0xdc,0x02,0x00,0x7f,0x00 + +# CHECK: scratch_load_sshort v2, off offset:2048 lds ; encoding: [0x00,0x68,0x4c,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x68,0x4c,0xdc,0x02,0x00,0x7f,0x00 + +# CHECK: scratch_load_dword off, s4 offset:2048 lds ; encoding: [0x00,0x68,0x50,0xdc,0x00,0x00,0x04,0x00] +0x00,0x68,0x50,0xdc,0x00,0x00,0x04,0x00 + +# CHECK: scratch_load_ubyte off, s4 offset:2048 lds ; encoding: [0x00,0x68,0x40,0xdc,0x00,0x00,0x04,0x00] +0x00,0x68,0x40,0xdc,0x00,0x00,0x04,0x00 + +# CHECK: scratch_load_sbyte off, s4 offset:2048 lds ; encoding: [0x00,0x68,0x44,0xdc,0x00,0x00,0x04,0x00] +0x00,0x68,0x44,0xdc,0x00,0x00,0x04,0x00 + +# CHECK: scratch_load_ushort off, s4 offset:2048 lds ; encoding: [0x00,0x68,0x48,0xdc,0x00,0x00,0x04,0x00] +0x00,0x68,0x48,0xdc,0x00,0x00,0x04,0x00 + +# CHECK: scratch_load_sshort off, s4 offset:2048 lds ; encoding: [0x00,0x68,0x4c,0xdc,0x00,0x00,0x04,0x00] +0x00,0x68,0x4c,0xdc,0x00,0x00,0x04,0x00