Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -173,6 +173,12 @@ "Has i16/f16 instructions" >; +def FeatureScalarStores : SubtargetFeature<"scalar-stores", + "HasScalarStores", + "true", + "Has store scalar memory instructions" +>; + //===------------------------------------------------------------===// // Subtarget Features (options and debugging) //===------------------------------------------------------------===// @@ -313,7 +319,7 @@ [FeatureFP64, FeatureLocalMemorySize65536, FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, - FeatureInv2PiInlineImm, FeatureSMemRealTime + FeatureInv2PiInlineImm, FeatureSMemRealTime, FeatureScalarStores ] >; Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -99,6 +99,7 @@ bool HasSMemRealTime; bool Has16BitInsts; bool HasInv2PiInlineImm; + bool HasScalarStores; bool FlatAddressSpace; bool R600ALUInst; bool CaymanISA; @@ -509,6 +510,10 @@ return HasInv2PiInlineImm; } + bool hasScalarStores() const { + return HasScalarStores; + } + bool enableSIScheduler() const { return EnableSIScheduler; } Index: lib/Target/AMDGPU/SMInstructions.td =================================================================== --- lib/Target/AMDGPU/SMInstructions.td +++ lib/Target/AMDGPU/SMInstructions.td @@ -37,6 +37,7 @@ bits<1> has_sbase = 1; bits<1> has_sdst = 1; + bit has_glc = 0; bits<1> has_offset = 1; bits<1> offset_is_imm = 0; } @@ -55,12 +56,24 @@ bits<7> sbase; bits<7> sdst; bits<32> offset; - bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0); + bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0); } class SM_Load_Pseudo pattern=[]> : SM_Pseudo { RegisterClass BaseClass; + let mayLoad = 1; + let mayStore = 0; + let has_glc = 1; +} + +class SM_Store_Pseudo pattern = []> + : SM_Pseudo { + RegisterClass BaseClass; + RegisterClass SrcClass; + let mayLoad = 0; + let mayStore = 1; + let has_glc = 1; } multiclass SM_Pseudo_Loads { def _IMM : SM_Load_Pseudo { + (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc), + " $sdst, $sbase, $offset$glc", []> { let offset_is_imm = 1; let BaseClass = baseClass; let PseudoInstr = opName # "_IMM"; + let has_glc = 1; } + def _SGPR : SM_Load_Pseudo { + (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc), + " $sdst, $sbase, $offset$glc", []> { let BaseClass = baseClass; let PseudoInstr = opName # "_SGPR"; + let has_glc = 1; + } +} + +multiclass SM_Pseudo_Stores { + // Store instructions cannot use IMM=0, and OFFSET is anything other + // than M0. + def _SGPR : SM_Store_Pseudo { + let BaseClass = baseClass; + let SrcClass = srcClass; + let PseudoInstr = opName # "_SGPR"; } } @@ -139,6 +169,23 @@ "s_buffer_load_dwordx16", SReg_128, SReg_512 >; +defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0>; +defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64>; +defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>; + +defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores < + "s_buffer_store_dword", SReg_128, SReg_32_XM0 +>; + +defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores < + "s_buffer_store_dwordx2", SReg_128, SReg_64 +>; + +defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores < + "s_buffer_store_dwordx4", SReg_128, SReg_128 +>; + + def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>; def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>; @@ -179,13 +226,13 @@ // 1. IMM offset def : Pat < (smrd_load (SMRDImm i64:$sbase, i32:$offset)), - (vt (!cast(Instr#"_IMM") $sbase, $offset)) + (vt (!cast(Instr#"_IMM") $sbase, $offset, 0)) >; // 2. SGPR offset def : Pat < (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)), - (vt (!cast(Instr#"_SGPR") $sbase, $offset)) + (vt (!cast(Instr#"_SGPR") $sbase, $offset, 0)) >; } @@ -210,13 +257,13 @@ // 1. Offset as an immediate def SM_LOAD_PATTERN : Pat < // name this pattern to reuse AddedComplexity on CI (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)), - (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset) + (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset, 0) >; // 2. Offset loaded in an 32bit SGPR def : Pat < (SIload_constant v4i32:$sbase, (SMRDBufferSgpr i32:$offset)), - (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset) + (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset, 0) >; } // End let AddedComplexity = 100 @@ -228,7 +275,7 @@ // 1. Offset as 20bit DWORD immediate def : Pat < (SIload_constant v4i32:$sbase, IMM20bit:$offset), - (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset)) + (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset), 0) >; def : Pat < @@ -263,15 +310,22 @@ let Inst{31-27} = 0x18; //encoding } +// FIXME: Assembler should reject trying to use glc on SMRD +// instructions on SI. multiclass SM_Real_Loads_si op, string ps, SM_Load_Pseudo immPs = !cast(ps#_IMM), SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> { + def _IMM_si : SMRD_Real_si { - let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset:$offset); + let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset:$offset, GLC:$glc); } + + // FIXME: The operand name $offset is inconsistent with $soff used + // in the pseudo def _SGPR_si : SMRD_Real_si { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset); + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc); } + } defm S_LOAD_DWORD : SM_Real_Loads_si <0x00, "S_LOAD_DWORD">; @@ -297,6 +351,7 @@ : SM_Real , SIMCInstr , Enc64 { + bit glc; let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; @@ -304,10 +359,8 @@ let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); - // glc is only applicable to scalar stores, which are not yet - // implemented. - let Inst{16} = 0; // glc bit - let Inst{17} = imm; + let Inst{16} = !if(ps.has_glc, glc, ?); + let Inst{17} = imm; let Inst{25-18} = op; let Inst{31-26} = 0x30; //encoding let Inst{51-32} = !if(ps.has_offset, offset{19-0}, ?); @@ -317,10 +370,19 @@ SM_Load_Pseudo immPs = !cast(ps#_IMM), SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> { def _IMM_vi : SMEM_Real_vi { - let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset:$offset); + let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset:$offset, GLC:$glc); } def _SGPR_vi : SMEM_Real_vi { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset); + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc); + } +} + +multiclass SM_Real_Stores_vi op, string ps, + SM_Store_Pseudo sgprPs = !cast(ps#_SGPR)> { + // FIXME: The operand name $offset is inconsistent with $soff used + // in the pseudo + def _SGPR_vi : SMEM_Real_vi { + let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc); } } @@ -335,6 +397,14 @@ defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_vi <0x0b, "S_BUFFER_LOAD_DWORDX8">; defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c, "S_BUFFER_LOAD_DWORDX16">; +defm S_STORE_DWORD : SM_Real_Stores_vi <0x10, "S_STORE_DWORD">; +defm S_STORE_DWORDX2 : SM_Real_Stores_vi <0x11, "S_STORE_DWORDX2">; +defm S_STORE_DWORDX4 : SM_Real_Stores_vi <0x12, "S_STORE_DWORDX4">; + +defm S_BUFFER_STORE_DWORD : SM_Real_Stores_vi <0x18, "S_BUFFER_STORE_DWORD">; +defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_vi <0x19, "S_BUFFER_STORE_DWORDX2">; +defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_vi <0x1a, "S_BUFFER_STORE_DWORDX4">; + def S_DCACHE_INV_vi : SMEM_Real_vi <0x20, S_DCACHE_INV>; def S_DCACHE_WB_vi : SMEM_Real_vi <0x21, S_DCACHE_WB>; def S_DCACHE_INV_VOL_vi : SMEM_Real_vi <0x22, S_DCACHE_INV_VOL>; @@ -358,7 +428,7 @@ let AssemblerPredicates = [isCIOnly]; let DecoderNamespace = "CI"; - let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset); + let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc); let LGKM_CNT = ps.LGKM_CNT; let SMRD = ps.SMRD; @@ -410,7 +480,7 @@ class SMRD_Pattern_ci : Pat < (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), - (vt (!cast(Instr#"_IMM_ci") $sbase, $offset))> { + (vt (!cast(Instr#"_IMM_ci") $sbase, $offset, 0))> { let Predicates = [isCIOnly]; } @@ -422,7 +492,7 @@ def : Pat < (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)), - (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset)> { + (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset, 0)> { let Predicates = [isCI]; // should this be isCIOnly? } Index: test/CodeGen/AMDGPU/coalescer-subreg-join.mir =================================================================== --- test/CodeGen/AMDGPU/coalescer-subreg-join.mir +++ test/CodeGen/AMDGPU/coalescer-subreg-join.mir @@ -46,10 +46,10 @@ %0 = COPY %sgpr2_sgpr3 %1 = COPY %vgpr2 %2 = COPY %vgpr3 - %3 = S_LOAD_DWORDX8_IMM %0, 0 - %4 = S_LOAD_DWORDX4_IMM %0, 12 - %5 = S_LOAD_DWORDX8_IMM %0, 16 - %6 = S_LOAD_DWORDX4_IMM %0, 28 + %3 = S_LOAD_DWORDX8_IMM %0, 0, 0 + %4 = S_LOAD_DWORDX4_IMM %0, 12, 0 + %5 = S_LOAD_DWORDX8_IMM %0, 16, 0 + %6 = S_LOAD_DWORDX4_IMM %0, 28, 0 undef %7.sub0 = S_MOV_B32 212739 %20 = COPY %7 %11 = COPY %20 Index: test/MC/AMDGPU/smem.s =================================================================== --- test/MC/AMDGPU/smem.s +++ test/MC/AMDGPU/smem.s @@ -13,3 +13,27 @@ s_memrealtime s[4:5] // VI: s_memrealtime s[4:5] ; encoding: [0x00,0x01,0x94,0xc0,0x00,0x00,0x00,0x00] // NOSI: error: instruction not supported on this GPU + +// FIXME: Should error about instruction on GPU +s_store_dword s1, s[2:3], 0xfc +// VI: s_store_dword s1, s[2:3], 0xfc ; encoding: [0x41,0x00,0x42,0xc0,0xfc,0x00,0x00,0x00] +// NOSI: error: instruction not supported on this GPU + +s_store_dword s1, s[2:3], 0xfc glc +// VI: s_store_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x43,0xc0,0xfc,0x00,0x00,0x00] +// NOSI: error: invalid operand for instruction + +s_store_dword s1, s[2:3], s4 +// VI: s_store_dword s1, s[2:3], s4 ; encoding: [0x41,0x00,0x40,0xc0,0x04,0x00,0x00,0x00] +// NOSI: error: instruction not supported on this GPU + +s_store_dword s1, s[2:3], s4 glc +// VI: s_store_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x41,0xc0,0x04,0x00,0x00,0x00] +// NOSI: error: invalid operand for instruction + +// FIXME: Should error on SI instead of silently ignoring glc +s_load_dword s1, s[2:3], 0xfc glc +// VI: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x03,0xc0,0xfc,0x00,0x00,0x00] + +s_load_dword s1, s[2:3], s4 glc +// VI: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x01,0xc0,0x04,0x00,0x00,0x00] Index: test/MC/AMDGPU/smrd-err.s =================================================================== --- test/MC/AMDGPU/smrd-err.s +++ test/MC/AMDGPU/smrd-err.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s s_load_dwordx4 s[100:103], s[2:3], s4