diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1175,8 +1175,13 @@ let SubtargetPredicate = isGFX90APlus in { def BUFFER_WBL2 : MUBUF_Invalidate<"buffer_wbl2"> { + let has_glc = 1; + let has_sccb = 1; + let InOperandList = (ins CPol_0:$cpol); + let AsmOperands = "$cpol"; } def BUFFER_INVL2 : MUBUF_Invalidate<"buffer_invl2"> { + let SubtargetPredicate = isGFX90AOnly; } defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64, int_amdgcn_global_atomic_fadd>; @@ -1184,6 +1189,14 @@ defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64, int_amdgcn_global_atomic_fmax>; } // End SubtargetPredicate = isGFX90APlus +def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { + let SubtargetPredicate = isGFX940Plus; + let has_glc = 1; + let has_sccb = 1; + let InOperandList = (ins CPol_0:$cpol); + let AsmOperands = "$cpol"; +} + let SubtargetPredicate = isGFX10Plus in { def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">; def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">; @@ -2366,9 +2379,28 @@ let Inst{55} = acc; } +class MUBUF_Real_gfx940 op, MUBUF_Pseudo ps> : + MUBUF_Real_Base_vi { + let AssemblerPredicate = isGFX940Plus; + let DecoderNamespace = "GFX9"; + let AsmString = ps.Mnemonic # !subst("$tfe", "", ps.AsmOperands); + + let Inst{55} = acc; +} + multiclass MUBUF_Real_vi_gfx90a op, MUBUF_Pseudo ps> { def _vi : MUBUF_Real_vi; - def _gfx90a : MUBUF_Real_gfx90a; + + foreach _ = BoolToList.ret in + def _gfx90a : MUBUF_Real_gfx90a; + + foreach _ = BoolToList.ret in { + def _gfx90a : MUBUF_Real_gfx90a { + let SubtargetPredicate = isGFX90AOnly; + let AssemblerPredicate = isGFX90AOnly; + } + def _gfx940 : MUBUF_Real_gfx940; + } } multiclass MUBUF_Real_AllAddr_vi op> { @@ -2558,9 +2590,17 @@ } // End SubtargetPredicate = isGFX90APlus, AssemblerPredicate = isGFX90APlus def BUFFER_WBL2_gfx90a : MUBUF_Real_gfx90a<0x28, BUFFER_WBL2> { + let AsmString = BUFFER_WBL2.Mnemonic; // drop flags + let AssemblerPredicate = isGFX90AOnly; + let SubtargetPredicate = isGFX90AOnly; } def BUFFER_INVL2_gfx90a : MUBUF_Real_gfx90a<0x29, BUFFER_INVL2>; +let SubtargetPredicate = isGFX940Plus in { +def BUFFER_WBL2_gfx940 : MUBUF_Real_gfx940<0x28, BUFFER_WBL2>; +def BUFFER_INV_gfx940 : MUBUF_Real_gfx940<0x29, BUFFER_INV>; +} + class MTBUF_Real_Base_vi op, MTBUF_Pseudo ps, int Enc> : MTBUF_Real, Enc64, diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -1360,7 +1360,9 @@ // to initiate writeback of any dirty cache lines of earlier writes by the // same wave. A "S_WAITCNT vmcnt(0)" is needed after to ensure the // writeback has completed. - BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2)); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2)) + // Set SC bits to indicate system scope. + .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1); // Followed by same as GFX7, which will ensure the necessary "S_WAITCNT // vmcnt(0)" needed by the "BUFFER_WBL2". Changed = true; diff --git a/llvm/test/MC/AMDGPU/gfx940_asm_features.s b/llvm/test/MC/AMDGPU/gfx940_asm_features.s --- a/llvm/test/MC/AMDGPU/gfx940_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx940_asm_features.s @@ -149,6 +149,33 @@ // GFX940: v_mov_b64_e32 v[2:3], 0x64 ; encoding: [0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00] v_mov_b64 v[2:3], 0x64 +// GFX90A: error: invalid operand for instruction +// GFX10: error: instruction not supported on this GPU +// GFX940: buffer_wbl2 sc1 ; encoding: [0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00] +buffer_wbl2 sc1 + +// GFX90A: error: invalid operand for instruction +// GFX10: error: instruction not supported on this GPU +// GFX940: buffer_wbl2 sc0 ; encoding: [0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00] +buffer_wbl2 sc0 + +// GFX90A: error: invalid operand for instruction +// GFX10: error: instruction not supported on this GPU +// GFX940: buffer_wbl2 sc0 sc1 ; encoding: [0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00] +buffer_wbl2 sc0 sc1 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: buffer_inv sc0 ; encoding: [0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00] +buffer_inv sc0 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: buffer_inv sc1 ; encoding: [0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00] +buffer_inv sc1 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: buffer_inv sc0 sc1 ; encoding: [0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00] +buffer_inv sc0 sc1 + // NOT-GFX940: error: invalid operand for instruction // GFX940: buffer_atomic_swap v5, off, s[8:11], s3 sc0 ; encoding: [0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03] buffer_atomic_swap v5, off, s[8:11], s3 sc0 @@ -224,3 +251,28 @@ // GFX10: error: instruction not supported on this GPU // GFX940: global_atomic_max_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00] global_atomic_max_f64 v[0:1], v[2:3], off sc1 + +// GFX90A: error: invalid operand for instruction +// GFX10: error: instruction not supported on this GPU +// GFX940: buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03] +buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1 + +// GFX90A: error: invalid operand for instruction +// GFX10: error: instruction not supported on this GPU +// GFX940: buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03] +buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1 + +// GFX90A: error: invalid operand for instruction +// GFX10: error: instruction not supported on this GPU +// GFX940: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03] +buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1 + +// GFX90A: error: invalid operand for instruction +// GFX10: error: instruction not supported on this GPU +// GFX940: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03] +buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1 + +// GFX90A: error: invalid operand for instruction +// GFX10: error: instruction not supported on this GPU +// GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03] +buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 diff --git a/llvm/test/MC/AMDGPU/gfx940_err.s b/llvm/test/MC/AMDGPU/gfx940_err.s --- a/llvm/test/MC/AMDGPU/gfx940_err.s +++ b/llvm/test/MC/AMDGPU/gfx940_err.s @@ -31,6 +31,9 @@ v_mov_b64_sdwa v[2:3], v[4:5] // GFX940: error: sdwa variant of this instruction is not supported +buffer_invl2 +// GFX940: error: instruction not supported on this GPU + global_load_dword v2, v[2:3], off glc // GFX940: error: invalid operand for instruction @@ -48,3 +51,9 @@ buffer_atomic_swap v5, off, s[8:11], s3 slc // GFX940: error: invalid operand for instruction + +buffer_wbl2 glc +// GFX940: error: invalid operand for instruction + +buffer_wbl2 scc +// GFX940: error: invalid operand for instruction diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt @@ -102,6 +102,24 @@ # GFX940: v_mov_b64_e32 v[2:3], 0x64 ; encoding: [0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00] 0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00 +# GFX940: buffer_wbl2 sc1 ; encoding: [0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00] +0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00 + +# GFX940: buffer_wbl2 sc0 ; encoding: [0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00] +0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00 + +# GFX940: buffer_wbl2 sc0 sc1 ; encoding: [0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00] +0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00 + +# GFX940: buffer_inv sc0 ; encoding: [0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00] +0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00 + +# GFX940: buffer_inv sc1 ; encoding: [0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00] +0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00 + +# GFX940: buffer_inv sc0 sc1 ; encoding: [0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00] +0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00 + # GFX940: buffer_atomic_swap v5, off, s[8:11], s3 sc0 ; encoding: [0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03] 0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03 @@ -149,3 +167,18 @@ # GFX940: global_atomic_max_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00] 0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00 + +# GFX940: buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03] +0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03 + +# GFX940: buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03] +0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03 + +# GFX940: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03] +0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03 + +# GFX940: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03] +0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03 + +# GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03] +0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03