Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -599,7 +599,11 @@ "Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">, AssemblerPredicate<"FeatureCIInsts">; -def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">; +def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, + AssemblerPredicate<"FeatureFlatAddressSpace">; + +def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">, + AssemblerPredicate<"FeatureFlatGlobalInsts">; def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">, AssemblerPredicate<"Feature16BitInsts">; Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2604,11 +2604,21 @@ return MatchOperand_ParseFail; Parser.Lex(); + + bool IsMinus = false; + if (getLexer().getKind() == AsmToken::Minus) { + Parser.Lex(); + IsMinus = true; + } + if (getLexer().isNot(AsmToken::Integer)) return MatchOperand_ParseFail; if (getParser().parseAbsoluteExpression(Int)) return MatchOperand_ParseFail; + + if (IsMinus) + Int = -Int; break; } } Index: lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- lib/Target/AMDGPU/FLATInstructions.td +++ lib/Target/AMDGPU/FLATInstructions.td @@ -31,8 +31,6 @@ let VM_CNT = 1; let LGKM_CNT = 1; - let Uses = [EXEC, FLAT_SCR]; // M0 - let UseNamedOperandTable = 1; let hasSideEffects = 0; let SchedRW = [WriteVMEM]; @@ -40,10 +38,16 @@ string Mnemonic = opName; string AsmOperands = asmOps; + bits<1> is_flat_global = 0; + bits<1> is_flat_scratch = 0; + bits<1> has_vdst = 1; bits<1> has_data = 1; bits<1> has_glc = 1; bits<1> glcValue = 0; + + // TODO: M0 if it could possibly access LDS (before gfx9? only)? + let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); } class FLAT_Real op, FLAT_Pseudo ps> : @@ -68,7 +72,10 @@ // Only valid on gfx9 bits<1> lds = 0; // XXX - What does this actually do? - bits<2> seg; // Segment, 00=flat, 01=scratch, 10=global, 11=reserved + + // Segment, 00=flat, 01=scratch, 10=global, 11=reserved + bits<2> seg = !if(ps.is_flat_global, 0b10, + !if(ps.is_flat_scratch, 0b01, 0)); // Signed offset. Highest bit ignored for flat and treated as 12-bit // unsigned for flat acceses. @@ -81,7 +88,7 @@ // Only valid on GFX9+ let Inst{12-0} = offset; let Inst{13} = lds; - let Inst{15-14} = 0; + let Inst{15-14} = seg; let Inst{16} = !if(ps.has_glc, glc, ps.glcValue); let Inst{17} = slc; @@ -106,6 +113,16 @@ let mayLoad = 1; } +class FLAT_Global_Load_Pseudo : + FLAT_Load_Pseudo { + let is_flat_global = 1; +} + +class FLAT_Scratch_Load_Pseudo : + FLAT_Load_Pseudo { + let is_flat_scratch = 1; +} + class FLAT_Store_Pseudo : FLAT_Pseudo< opName, @@ -119,6 +136,16 @@ let has_vdst = 0; } +class FLAT_Global_Store_Pseudo : + FLAT_Store_Pseudo { + let is_flat_global = 1; +} + +class FLAT_Scratch_Store_Pseudo : + FLAT_Store_Pseudo { + let is_flat_scratch = 1; +} + multiclass FLAT_Atomic_Pseudo< string opName, RegisterClass vdst_rc, @@ -306,6 +333,26 @@ } // End SubtargetPredicate = isCI +let SubtargetPredicate = HasFlatGlobalInsts in { +def GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; +def GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; +def GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; +def GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; +def GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; +def GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; +def GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; +def GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; + +def GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; +def GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; +def GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; +def GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; +def GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; +def GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; + +} // End SubtargetPredicate = HasFlatGlobalInsts + + //===----------------------------------------------------------------------===// // Flat Patterns //===----------------------------------------------------------------------===// @@ -557,3 +604,18 @@ defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>; defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>; +def GLOBAL_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, GLOBAL_LOAD_UBYTE>; +def GLOBAL_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, GLOBAL_LOAD_SBYTE>; +def GLOBAL_LOAD_USHORT_vi : FLAT_Real_vi <0x12, GLOBAL_LOAD_USHORT>; +def GLOBAL_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, GLOBAL_LOAD_SSHORT>; +def GLOBAL_LOAD_DWORD_vi : FLAT_Real_vi <0x14, GLOBAL_LOAD_DWORD>; +def GLOBAL_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, GLOBAL_LOAD_DWORDX2>; +def GLOBAL_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, GLOBAL_LOAD_DWORDX4>; +def GLOBAL_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, GLOBAL_LOAD_DWORDX3>; + +def GLOBAL_STORE_BYTE_vi : FLAT_Real_vi <0x18, GLOBAL_STORE_BYTE>; +def GLOBAL_STORE_SHORT_vi : FLAT_Real_vi <0x1a, GLOBAL_STORE_SHORT>; +def GLOBAL_STORE_DWORD_vi : FLAT_Real_vi <0x1c, GLOBAL_STORE_DWORD>; +def GLOBAL_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, GLOBAL_STORE_DWORDX2>; +def GLOBAL_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, GLOBAL_STORE_DWORDX4>; +def GLOBAL_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, GLOBAL_STORE_DWORDX3>; Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -42,6 +42,7 @@ void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU32ImmOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O, @@ -52,6 +53,9 @@ void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printOffsetS13(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); + void printOffset0(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printOffset1(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -72,6 +72,11 @@ O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff); } +void AMDGPUInstPrinter::printS16ImmDecOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << formatDec(static_cast(MI->getOperand(OpNo).getImm())); +} + void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -118,6 +123,16 @@ } } +void AMDGPUInstPrinter::printOffsetS13(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + uint16_t Imm = MI->getOperand(OpNo).getImm(); + if (Imm != 0) { + O << ((OpNo == 0)? "offset:" : " offset:"); + printS16ImmDecOperand(MI, OpNo, O); + } +} + void AMDGPUInstPrinter::printOffset0(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -491,7 +491,7 @@ def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>; def offset_u12 : NamedOperandU12<"Offset", NamedMatchClass<"OffsetU12">>; -def offset_s13 : NamedOperandS13<"Offset", NamedMatchClass<"OffsetS13">>; +def offset_s13 : NamedOperandS13<"OffsetS13", NamedMatchClass<"OffsetS13">>; def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>; def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>; def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>; Index: test/MC/AMDGPU/flat-global.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/flat-global.s @@ -0,0 +1,87 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s + +global_load_ubyte v1, v[3:4] +// GFX9: global_load_ubyte v1, v[3:4] ; encoding: [0x00,0x80,0x40,0xdc,0x03,0x00,0x00,0x01] +// VI-ERR: instruction not supported on this GPU + +global_load_sbyte v1, v[3:4] +// GFX9: global_load_sbyte v1, v[3:4] ; encoding: [0x00,0x80,0x44,0xdc,0x03,0x00,0x00,0x01] +// VI-ERR: instruction not supported on this GPU + +global_load_ushort v1, v[3:4] +// GFX9: global_load_ushort v1, v[3:4] ; encoding: [0x00,0x80,0x48,0xdc,0x03,0x00,0x00,0x01] +// VI-ERR: instruction not supported on this GPU + +global_load_sshort v1, v[3:4] +// GFX9: global_load_sshort v1, v[3:4] ; encoding: [0x00,0x80,0x4c,0xdc,0x03,0x00,0x00,0x01] +// VI-ERR: instruction not supported on this GPU + +global_load_dword v1, v[3:4] +// GFX9: global_load_dword v1, v[3:4] ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x00,0x01] +// VI-ERR: instruction not supported on this GPU + +global_load_dwordx2 v[1:2], v[3:4] +// GFX9: global_load_dwordx2 v[1:2], v[3:4] ; encoding: [0x00,0x80,0x54,0xdc,0x03,0x00,0x00,0x01] +// VI-ERR: instruction not supported on this GPU + +global_load_dwordx3 v[1:3], v[3:4] +// GFX9: global_load_dwordx3 v[1:3], v[3:4] ; encoding: [0x00,0x80,0x58,0xdc,0x03,0x00,0x00,0x01] +// VI-ERR: instruction not supported on this GPU + +global_load_dwordx4 v[1:4], v[3:4] +// GFX9: global_load_dwordx4 v[1:4], v[3:4] ; encoding: [0x00,0x80,0x5c,0xdc,0x03,0x00,0x00,0x01] +// VI-ERR: instruction not supported on this GPU +// FIXME: VI error should be instruction nto supported +global_load_dword v1, v[3:4] offset:0 +// GFX9: global_load_dword v1, v[3:4] ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x00,0x01] +// VI-ERR: :36: error: not a valid operand. + +global_load_dword v1, v[3:4] offset:4095 +// GFX9: global_load_dword v1, v[3:4] offset:4095 ; encoding: [0xff,0x8f,0x50,0xdc,0x03,0x00,0x00,0x01] +// VI-ERR: :36: error: not a valid operand. + +global_load_dword v1, v[3:4] offset:-1 +// GFX9: global_load_dword v1, v[3:4] offset:-1 ; encoding: [0xff,0x9f,0x50,0xdc,0x03,0x00,0x00,0x01] +// VI-ERR: :36: error: not a valid operand. + +global_load_dword v1, v[3:4] offset:-4096 +// GFX9: global_load_dword v1, v[3:4] offset:-4096 ; encoding: [0x00,0x90,0x50,0xdc,0x03,0x00,0x00,0x01] +// VI-ERR: :36: error: not a valid operand. + +global_load_dword v1, v[3:4] offset:4096 +// GFX9-ERR: :30: error: invalid operand for instruction +// VI-ERR: :36: error: not a valid operand. + +global_load_dword v1, v[3:4] offset:-4097 +// GFX9-ERR: :30: error: invalid operand for instruction +// VI-ERR: :36: error: not a valid operand. + +global_store_byte v[3:4], v1 +// GFX9: global_store_byte v[3:4], v1 ; encoding: [0x00,0x80,0x60,0xdc,0x03,0x01,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_store_short v[3:4], v1 +// GFX9: global_store_short v[3:4], v1 ; encoding: [0x00,0x80,0x68,0xdc,0x03,0x01,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_store_dword v[3:4], v1 +// GFX9: global_store_dword v[3:4], v1 ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_store_dwordx2 v[3:4], v[1:2] +// GFX9: global_store_dwordx2 v[3:4], v[1:2] ; encoding: [0x00,0x80,0x74,0xdc,0x03,0x01,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_store_dwordx3 v[3:4], v[1:3] +// GFX9: global_store_dwordx3 v[3:4], v[1:3] ; encoding: [0x00,0x80,0x78,0xdc,0x03,0x01,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_store_dwordx4 v[3:4], v[1:4] +// GFX9: global_store_dwordx4 v[3:4], v[1:4] ; encoding: [0x00,0x80,0x7c,0xdc,0x03,0x01,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_store_dword v[3:4], v1 offset:12 +// GFX9: global_store_dword v[3:4], v1 offset:12 ; encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x00,0x00] +// VI-ERR: :37: error: not a valid operand