diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1219,6 +1219,10 @@ Predicate<"Subtarget->hasGFX90AInsts()">, AssemblerPredicate<(all_of FeatureGFX90AInsts)>; +def isGFX908orGFX90A : + Predicate<"Subtarget->hasMAIInsts()">, + AssemblerPredicate<(all_of FeatureMAIInsts)>; + def isGFX8GFX9 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1384,18 +1384,21 @@ // VI //===----------------------------------------------------------------------===// -class FLAT_Real_vi op, FLAT_Pseudo ps> : +class FLAT_Real_vi op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : FLAT_Real , SIMCInstr { let AssemblerPredicate = isGFX8GFX9; let DecoderNamespace = "GFX8"; - let Inst{25} = !if(ps.has_sccb, sccb, ps.sccbValue); + let Inst{25} = !if(has_sccb, sccb, ps.sccbValue); + let AsmString = ps.Mnemonic # + !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands); } -multiclass FLAT_Real_AllAddr_vi op> { - def _vi : FLAT_Real_vi(NAME)>; - def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR")>; +multiclass FLAT_Real_AllAddr_vi op, + bit has_sccb = !cast(NAME).has_sccb> { + def _vi : FLAT_Real_vi(NAME), has_sccb>; + def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR"), has_sccb>; } def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; @@ -1423,15 +1426,17 @@ def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; -multiclass FLAT_Real_Atomics_vi op, FLAT_Pseudo ps> { - def _vi : FLAT_Real_vi(ps.PseudoInstr)>; - def _RTN_vi : FLAT_Real_vi(ps.PseudoInstr # "_RTN")>; +multiclass FLAT_Real_Atomics_vi op, FLAT_Pseudo ps, + bit has_sccb = !cast(NAME).has_sccb> { + def _vi : FLAT_Real_vi(ps.PseudoInstr), has_sccb>; + def _RTN_vi : FLAT_Real_vi(ps.PseudoInstr # "_RTN"), has_sccb>; } -multiclass FLAT_Global_Real_Atomics_vi op> : - FLAT_Real_AllAddr_vi { - def _RTN_vi : FLAT_Real_vi (NAME#"_RTN")>; - def _SADDR_RTN_vi : FLAT_Real_vi (NAME#"_SADDR_RTN")>; +multiclass FLAT_Global_Real_Atomics_vi op, + bit has_sccb = !cast(NAME).has_sccb> : + FLAT_Real_AllAddr_vi { + def _RTN_vi : FLAT_Real_vi (NAME#"_RTN"), has_sccb>; + def _SADDR_RTN_vi : FLAT_Real_vi (NAME#"_SADDR_RTN"), has_sccb>; } @@ -1538,14 +1543,19 @@ defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; -let SubtargetPredicate = isGFX90APlus in { - defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64>; - defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64>; - defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64>; - defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f>; - defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50>; - defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51>; -} // End SubtargetPredicate = isGFX90APlus +let SubtargetPredicate = isGFX908orGFX90A in { +defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>; +defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; +} + +let SubtargetPredicate = isGFX90AOnly in { + defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64, 0>; + defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64, 0>; + defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64, 0>; + defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>; + defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>; + defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>; +} // End SubtargetPredicate = isGFX90AOnly //===----------------------------------------------------------------------===// // GFX10. @@ -1752,10 +1762,3 @@ defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>; defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>; defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>; - -let SubtargetPredicate = HasAtomicFaddInsts in { - -defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d>; -defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e>; - -} // End SubtargetPredicate = HasAtomicFaddInsts diff --git a/llvm/test/MC/AMDGPU/gfx90a_asm_features.s b/llvm/test/MC/AMDGPU/gfx90a_asm_features.s --- a/llvm/test/MC/AMDGPU/gfx90a_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx90a_asm_features.s @@ -600,10 +600,6 @@ // GFX90A: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x3d,0xdd,0x00,0x02,0x00,0x00] flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:4095 glc -// NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:4095 glc scc ; encoding: [0xff,0x0f,0x3d,0xdf,0x00,0x02,0x00,0x00] -flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:4095 glc scc - // NOT-GFX90A: error: instruction not supported on this GPU // GFX90A: flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x3e,0xdd,0x00,0x02,0x00,0x00] flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 slc @@ -1023,3 +1019,30 @@ // GFX90A: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc ; encoding: [0x00,0x00,0x41,0xdd,0x00,0x02,0x00,0x00] // NOT-GFX90A: error: instruction not supported on this GPU flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc + +// GFX90A: global_atomic_add v[2:3], v5, off scc ; encoding: [0x00,0x80,0x08,0xdf,0x02,0x05,0x7f,0x00] +// NOT-GFX90A: error: failed parsing operand. +global_atomic_add v[2:3], v5, off scc + +// GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off glc ; encoding: [0x00,0x80,0x35,0xdd,0x00,0x02,0x7f,0x00] +// GFX908: error: operands are not valid for this GPU or mode +// GFX1010: error: instruction not supported on this GPU +global_atomic_add_f32 v0, v[0:1], v2, off glc + +// GFX90A: global_atomic_add_f32 v[0:1], v2, off ; encoding: [0x00,0x80,0x34,0xdd,0x00,0x02,0x7f,0x00] +// GFX1010: error: instruction not supported on this GPU +global_atomic_add_f32 v[0:1], v2, off + +// GFX90A: global_atomic_add_f32 v0, v2, s[0:1] ; encoding: [0x00,0x80,0x34,0xdd,0x00,0x02,0x00,0x00] +// GFX1010: error: instruction not supported on this GPU +global_atomic_add_f32 v0, v2, s[0:1] + +// GFX90A: global_atomic_add_f32 v1, v0, v2, s[0:1] glc +// GFX908: error: operands are not valid for this GPU or mode +// GFX1010: error: instruction not supported on this GPU +global_atomic_add_f32 v1, v0, v2, s[0:1] glc ; encoding: [0x00,0x80,0x35,0xdd,0x00,0x02,0x00,0x01] + +// GFX908: error: operands are not valid for this GPU or mode +// GFX1010: error: instruction not supported on this GPU +// GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off glc ; encoding: [0x00,0x80,0x39,0xdd,0x00,0x02,0x7f,0x00] +global_atomic_pk_add_f16 v0, v[0:1], v2, off glc diff --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s --- a/llvm/test/MC/AMDGPU/gfx90a_err.s +++ b/llvm/test/MC/AMDGPU/gfx90a_err.s @@ -194,3 +194,39 @@ image_sample_b v[0:3], v[0:1], s[4:11], s[16:19] dmask:0xf // GFX90A: error: instruction not supported on this GPU + +global_atomic_add_f32 v0, v[0:1], v2, off glc scc +// GFX90A: error: invalid operand for instruction + +global_atomic_add_f32 v[0:1], v2, off scc +// GFX90A: error: invalid operand for instruction + +global_atomic_add_f32 v0, v2, s[0:1] scc +// GFX90A: error: invalid operand for instruction + +global_atomic_add_f32 v1, v0, v2, s[0:1] glc scc +// GFX90A: error: invalid operand for instruction + +global_atomic_pk_add_f16 v0, v[0:1], v2, off glc scc +// GFX90A: error: invalid operand for instruction + +flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc scc +// GFX90A: error: invalid operand for instruction + +flat_atomic_add_f64 v[0:1], v[2:3] scc +// GFX90A: error: invalid operand for instruction + +flat_atomic_min_f64 v[0:1], v[2:3] scc +// GFX90A: error: invalid operand for instruction + +flat_atomic_max_f64 v[0:1], v[2:3] scc +// GFX90A: error: invalid operand for instruction + +global_atomic_add_f64 v[0:1], v[2:3], off scc +// GFX90A: error: invalid operand for instruction + +global_atomic_min_f64 v[0:1], v[2:3], off scc +// GFX90A: error: invalid operand for instruction + +global_atomic_max_f64 v[0:1], v[2:3], off scc +// GFX90A: error: invalid operand for instruction diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt @@ -436,8 +436,8 @@ # GFX90A: flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x3c,0xdd,0x00,0x02,0x00,0x00] 0xff,0x0f,0x3c,0xdd,0x00,0x02,0x00,0x00 -# GFX90A: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:4095 glc scc ; encoding: [0xff,0x0f,0x3d,0xdf,0x00,0x02,0x00,0x00] -0xff,0x0f,0x3d,0xdf,0x00,0x02,0x00,0x00 +# GFX90A: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x3d,0xdd,0x00,0x02,0x00,0x00] +0xff,0x0f,0x3d,0xdd,0x00,0x02,0x00,0x00 # GFX90A: flat_atomic_add_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x3c,0xdd,0xfe,0x02,0x00,0x00] 0xff,0x0f,0x3c,0xdd,0xfe,0x02,0x00,0x00