Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -687,6 +687,8 @@ def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">, AssemblerPredicate<"FeatureFlatGlobalInsts">; +def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">, + AssemblerPredicate<"FeatureFlatScratchInsts">; def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarryInsts()">, AssemblerPredicate<"FeatureAddNoCarryInsts">; Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -80,6 +80,7 @@ MCOperand decodeOperand_SReg_32(unsigned Val) const; MCOperand decodeOperand_SReg_32_XM0_XEXEC(unsigned Val) const; + MCOperand decodeOperand_SReg_32_XEXEC_HI(unsigned Val) const; MCOperand decodeOperand_SReg_64(unsigned Val) const; MCOperand decodeOperand_SReg_64_XEXEC(unsigned Val) const; MCOperand decodeOperand_SReg_128(unsigned Val) const; Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -95,6 +95,7 @@ DECODE_OPERAND_REG(SReg_32) DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) +DECODE_OPERAND_REG(SReg_32_XEXEC_HI) DECODE_OPERAND_REG(SReg_64) DECODE_OPERAND_REG(SReg_64_XEXEC) DECODE_OPERAND_REG(SReg_128) @@ -365,6 +366,12 @@ return decodeOperand_SReg_32(Val); } +MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI( + unsigned Val) const { + // SReg_32_XM0 is SReg_32 without EXEC_HI + return decodeOperand_SReg_32(Val); +} + MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const { return decodeSrcOp(OPW64, Val); } Index: lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- lib/Target/AMDGPU/FLATInstructions.td +++ lib/Target/AMDGPU/FLATInstructions.td @@ -54,6 +54,7 @@ bits<1> has_saddr = 0; bits<1> enabled_saddr = 0; bits<7> saddr_value = 0; + bits<1> has_vaddr = 1; bits<1> has_data = 1; bits<1> has_glc = 1; @@ -109,7 +110,7 @@ let Inst{17} = slc; let Inst{24-18} = op; let Inst{31-26} = 0x37; // Encoding. - let Inst{39-32} = vaddr; + let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); let Inst{47-40} = !if(ps.has_data, vdata, ?); let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); @@ -140,18 +141,6 @@ let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); } -multiclass FLAT_Global_Load_Pseudo { - let is_flat_global = 1 in { - def "" : FLAT_Load_Pseudo; - def _SADDR : FLAT_Load_Pseudo; - } -} - -class FLAT_Scratch_Load_Pseudo : - FLAT_Load_Pseudo { - let is_flat_scratch = 1; -} - class FLAT_Store_Pseudo : FLAT_Pseudo< opName, @@ -172,6 +161,13 @@ let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); } +multiclass FLAT_Global_Load_Pseudo { + let is_flat_global = 1 in { + def "" : FLAT_Load_Pseudo; + def _SADDR : FLAT_Load_Pseudo; + } +} + multiclass FLAT_Global_Store_Pseudo { let is_flat_global = 1 in { def "" : FLAT_Store_Pseudo; @@ -179,9 +175,51 @@ } } -class FLAT_Scratch_Store_Pseudo : - FLAT_Store_Pseudo { - let is_flat_scratch = 1; +class FLAT_Scratch_Load_Pseudo : FLAT_Pseudo< + opName, + (outs regClass:$vdst), + !if(EnableSaddr, + (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, slc:$slc), + (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc)), + " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc"> { + let has_data = 0; + let mayLoad = 1; + let has_saddr = 1; + let enabled_saddr = EnableSaddr; + let has_vaddr = !if(EnableSaddr, 0, 1); + let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); +} + +class FLAT_Scratch_Store_Pseudo : FLAT_Pseudo< + opName, + (outs), + !if(EnableSaddr, + (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, slc:$slc), + (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc)), + " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc"> { + let mayLoad = 0; + let mayStore = 1; + let has_vdst = 0; + let has_saddr = 1; + let enabled_saddr = EnableSaddr; + let has_vaddr = !if(EnableSaddr, 0, 1); + + let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); +} + +multiclass FLAT_Scratch_Load_Pseudo { + let is_flat_scratch = 1 in { + def "" : FLAT_Scratch_Load_Pseudo; + def _SADDR : FLAT_Scratch_Load_Pseudo; + } +} + +multiclass FLAT_Scratch_Store_Pseudo { + let is_flat_scratch = 1 in { + def "" : FLAT_Scratch_Store_Pseudo; + def _SADDR : FLAT_Scratch_Store_Pseudo; + } } class FLAT_AtomicNoRet_Pseudo; +defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>; +defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>; +defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>; +defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>; +defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>; +defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>; +defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>; + +defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>; +defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>; +defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>; +defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>; +defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>; +defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>; + +} // End SubtargetPredicate = HasFlatScratchInsts + //===----------------------------------------------------------------------===// // Flat Patterns //===----------------------------------------------------------------------===// @@ -873,15 +930,16 @@ defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; -defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; +defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; -defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; +defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; + defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; @@ -909,3 +967,19 @@ defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; + +defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; +defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; +defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; +defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; +defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; +defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; +defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; +defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; + +defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; +defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; +defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; +defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; +defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; +defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; Index: lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.td +++ lib/Target/AMDGPU/SIRegisterInfo.td @@ -279,6 +279,11 @@ let AllocationPriority = 7; } +def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, + (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> { + let AllocationPriority = 7; +} + def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> { let AllocationPriority = 7; @@ -286,7 +291,7 @@ // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI)> { + (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> { let AllocationPriority = 7; } Index: test/MC/AMDGPU/flat-scratch-instructions.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/flat-scratch-instructions.s @@ -0,0 +1,145 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s + +scratch_load_ubyte v1, v2, off +// GFX9: scratch_load_ubyte v1, v2, off ; encoding: [0x00,0x40,0x40,0xdc,0x02,0x00,0x7f,0x01] +// VI-ERR: instruction not supported on this GPU + +scratch_load_sbyte v1, v2, off +// GFX9: scratch_load_sbyte v1, v2, off ; encoding: [0x00,0x40,0x44,0xdc,0x02,0x00,0x7f,0x01] +// VI-ERR: instruction not supported on this GPU + +scratch_load_ushort v1, v2, off +// GFX9: scratch_load_ushort v1, v2, off ; encoding: [0x00,0x40,0x48,0xdc,0x02,0x00,0x7f,0x01] +// VI-ERR: instruction not supported on this GPU + +scratch_load_sshort v1, v2, off +// GFX9: scratch_load_sshort v1, v2, off ; encoding: [0x00,0x40,0x4c,0xdc,0x02,0x00,0x7f,0x01] +// VI-ERR: instruction not supported on this GPU + +scratch_load_dword v1, v2, off +// GFX9: scratch_load_dword v1, v2, off ; encoding: [0x00,0x40,0x50,0xdc,0x02,0x00,0x7f,0x01] +// VI-ERR: instruction not supported on this GPU + +scratch_load_dwordx2 v[1:2], v3, off +// GFX9: scratch_load_dwordx2 v[1:2], v3, off ; encoding: [0x00,0x40,0x54,0xdc,0x03,0x00,0x7f,0x01] +// VI-ERR: instruction not supported on this GPU + +scratch_load_dwordx3 v[1:3], v4, off +// GFX9: scratch_load_dwordx3 v[1:3], v4, off ; encoding: [0x00,0x40,0x58,0xdc,0x04,0x00,0x7f,0x01] +// VI-ERR: instruction not supported on this GPU + +scratch_load_dwordx4 v[1:4], v5, off +// GFX9: scratch_load_dwordx4 v[1:4], v5, off ; encoding: [0x00,0x40,0x5c,0xdc,0x05,0x00,0x7f,0x01] +// VI-ERR: instruction not supported on this GPU +// FIXME: VI error should be instruction nto supported + +scratch_load_dword v1, v2, off offset:0 +// GFX9: scratch_load_dword v1, v2, off ; encoding: [0x00,0x40,0x50,0xdc,0x02,0x00,0x7f,0x01] +// VI-ERR: error: not a valid operand. + +scratch_load_dword v1, v2, off offset:4095 +// GFX9: scratch_load_dword v1, v2, off offset:4095 ; encoding: [0xff,0x4f,0x50,0xdc,0x02,0x00,0x7f,0x01] +// VI-ERR: error: not a valid operand. + +scratch_load_dword v1, v2, off offset:-1 +// GFX9: scratch_load_dword v1, v2, off offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x02,0x00,0x7f,0x01] +// VI-ERR: error: not a valid operand. + +scratch_load_dword v1, v2, off offset:-4096 +// GFX9: scratch_load_dword v1, v2, off offset:-4096 ; encoding: [0x00,0x50,0x50,0xdc,0x02,0x00,0x7f,0x01] +// VI-ERR: error: not a valid operand. + +scratch_load_dword v1, v2, off offset:4096 +// GFX9-ERR: error: invalid operand for instruction +// VI-ERR: error: not a valid operand. + +scratch_load_dword v1, v2, off offset:-4097 +// GFX9-ERR: error: invalid operand for instruction +// VI-ERR: error: not a valid operand. + +scratch_store_byte v1, v2, off +// GFX9: scratch_store_byte v1, v2, off ; encoding: [0x00,0x40,0x60,0xdc,0x01,0x02,0x7f,0x00] +// VI-ERR: instruction not supported on this GPU + +scratch_store_short v1, v2, off +// GFX9: scratch_store_short v1, v2, off ; encoding: [0x00,0x40,0x68,0xdc,0x01,0x02,0x7f,0x00] +// VI-ERR: instruction not supported on this GPU + +scratch_store_dword v1, v2, off +// GFX9: scratch_store_dword v1, v2, off ; encoding: [0x00,0x40,0x70,0xdc,0x01,0x02,0x7f,0x00] +// VI-ERR: instruction not supported on this GPU + +scratch_store_dwordx2 v1, v[2:3], off +// GFX9: scratch_store_dwordx2 v1, v[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x01,0x02,0x7f,0x00] +// VI-ERR: instruction not supported on this GPU + +scratch_store_dwordx3 v1, v[2:4], off +// GFX9: scratch_store_dwordx3 v1, v[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x01,0x02,0x7f,0x00] +// VI-ERR: instruction not supported on this GPU + +scratch_store_dwordx4 v1, v[2:5], off +// GFX9: scratch_store_dwordx4 v1, v[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x01,0x02,0x7f,0x00] +// VI-ERR: instruction not supported on this GPU + +scratch_store_dword v1, v2, off offset:12 +// GFX9: scratch_store_dword v1, v2, off offset:12 ; encoding: [0x0c,0x40,0x70,0xdc,0x01,0x02,0x7f,0x00] +// VI-ERR: error: not a valid operand + +scratch_load_dword v1, off, s1 +// GFX9: scratch_load_dword v1, off, s1 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x01,0x01] +// VI-ERR: instruction not supported on this GPU + +scratch_load_dword v1, off, s1 offset:32 +// GFX9: scratch_load_dword v1, off, s1 offset:32 ; encoding: [0x20,0x40,0x50,0xdc,0x00,0x00,0x01,0x01] +// VI-ERR: error: not a valid operand + +scratch_store_dword off, v2, s1 +// GFX9: scratch_store_dword off, v2, s1 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x01,0x00] +// VI-ERR: instruction not supported on this GPU + +scratch_store_dword off, v2, s1 offset:12 +// GFX9: scratch_store_dword off, v2, s1 offset:12 ; encoding: [0x0c,0x40,0x70,0xdc,0x00,0x02,0x01,0x00] +// VI-ERR: error: not a valid operand + +// FIXME: Should error about multiple offsets +scratch_load_dword v1, v2, s1 +// GFX9-ERR: error: invalid operand for instruction +// VI-ERR: error: invalid operand for instruction + +scratch_load_dword v1, v2, s1 offset:32 +// GFX9-ERR: error: invalid operand for instruction +// VI-ERR: error: not a valid operand + +scratch_store_dword v1, v2, s1 +// GFX9-ERR: error: invalid operand for instruction +// VI-ERR: error: invalid operand for instruction + +scratch_store_dword v1, v2, s1 offset:32 +// GFX9-ERR: error: invalid operand for instruction +// VI-ERR: error: not a valid operand + +scratch_load_dword v1, off, exec_hi +// GFX9-ERR: error: invalid operand for instruction +// VI-ERR: error: invalid operand for instruction + +scratch_store_dword off, v2, exec_hi +// GFX9-ERR: error: invalid operand for instruction +// VI-ERR: error: invalid operand for instruction + +scratch_load_dword v1, off, exec_lo +// GFX9: scratch_load_dword v1, off, exec_lo ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7e,0x01] +// VI-ERR: instruction not supported on this GPU + +scratch_store_dword off, v2, exec_lo +// GFX9: scratch_store_dword off, v2, exec_lo ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7e,0x00] +// VI-ERR: instruction not supported on this GPU + +scratch_load_dword v1, off, m0 +// GFX9: scratch_load_dword v1, off, m0 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7c,0x01] +// VI-ERR: instruction not supported on this GPU + +scratch_store_dword off, v2, m0 +// GFX9: scratch_store_dword off, v2, m0 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7c,0x00] +// VI-ERR: instruction not supported on this GPU