diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1882,146 +1882,35 @@ def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicRtn; -// llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp -def int_amdgcn_mfma_f32_32x32x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x1f32">, - Intrinsic<[llvm_v32f32_ty], - [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], +// llvm.amdgcn.mfma.*.* vdst, srcA, srcB, srcC, cbsz, abid, blgp +class AMDGPUMfmaIntrinsic : + GCCBuiltin, + Intrinsic<[DestTy], + [SrcABTy, SrcABTy, DestTy, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem, IntrWillReturn, ImmArg>, ImmArg>, ImmArg>]>; -def int_amdgcn_mfma_f32_16x16x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x1f32">, - Intrinsic<[llvm_v16f32_ty], - [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f32_4x4x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x1f32">, - Intrinsic<[llvm_v4f32_ty], - [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f32_32x32x2f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x2f32">, - Intrinsic<[llvm_v16f32_ty], - [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f32_16x16x4f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4f32">, - Intrinsic<[llvm_v4f32_ty], - [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f32_32x32x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4f16">, - Intrinsic<[llvm_v32f32_ty], - [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f32_16x16x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4f16">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f32_4x4x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x4f16">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f32_32x32x8f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x8f16">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f32_16x16x16f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x16f16">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_i32_32x32x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_32x32x4i8">, - Intrinsic<[llvm_v32i32_ty], - [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_i32_16x16x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_16x16x4i8">, - Intrinsic<[llvm_v16i32_ty], - [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_i32_4x4x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_4x4x4i8">, - Intrinsic<[llvm_v4i32_ty], - [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_i32_32x32x8i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_32x32x8i8">, - Intrinsic<[llvm_v16i32_ty], - [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_i32_16x16x16i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_16x16x16i8">, - Intrinsic<[llvm_v4i32_ty], - [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f32_32x32x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x2bf16">, - Intrinsic<[llvm_v32f32_ty], - [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f32_16x16x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x2bf16">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f32_4x4x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x2bf16">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f32_32x32x4bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4bf16">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f32_16x16x8bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x8bf16">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; +def int_amdgcn_mfma_f32_32x32x1f32 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_16x16x1f32 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_4x4x1f32 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_32x32x2f32 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_16x16x4f32 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_32x32x4f16 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_16x16x4f16 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_4x4x4f16 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_32x32x8f16 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_16x16x16f16 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_i32_32x32x4i8 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_i32_16x16x4i8 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_i32_4x4x4i8 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_i32_32x32x8i8 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_i32_16x16x16i8 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_32x32x2bf16 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_16x16x2bf16 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_4x4x2bf16 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_32x32x4bf16 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_16x16x8bf16 : AMDGPUMfmaIntrinsic; //===----------------------------------------------------------------------===// // gfx90a intrinsics @@ -2033,54 +1922,14 @@ def int_amdgcn_flat_atomic_fmin : AMDGPUGlobalAtomicRtn; def int_amdgcn_flat_atomic_fmax : AMDGPUGlobalAtomicRtn; -def int_amdgcn_mfma_f32_32x32x4bf16_1k : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4bf16_1k">, - Intrinsic<[llvm_v32f32_ty], - [llvm_v4i16_ty, llvm_v4i16_ty, llvm_v32f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f32_16x16x4bf16_1k : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4bf16_1k">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v4i16_ty, llvm_v4i16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; +def int_amdgcn_mfma_f32_32x32x4bf16_1k : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_16x16x4bf16_1k : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_32x32x8bf16_1k : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUMfmaIntrinsic; -def int_amdgcn_mfma_f32_4x4x4bf16_1k : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x4bf16_1k">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4i16_ty, llvm_v4i16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f32_32x32x8bf16_1k : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x8bf16_1k">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v4i16_ty, llvm_v4i16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f32_16x16x16bf16_1k : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x16bf16_1k">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4i16_ty, llvm_v4i16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f64_16x16x4f64 : GCCBuiltin<"__builtin_amdgcn_mfma_f64_16x16x4f64">, - Intrinsic<[llvm_v4f64_ty], - [llvm_double_ty, llvm_double_ty, llvm_v4f64_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; - -def int_amdgcn_mfma_f64_4x4x4f64 : GCCBuiltin<"__builtin_amdgcn_mfma_f64_4x4x4f64">, - Intrinsic<[llvm_double_ty], - [llvm_double_ty, llvm_double_ty, llvm_double_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, - ImmArg>, ImmArg>, ImmArg>]>; +def int_amdgcn_mfma_f64_16x16x4f64 : AMDGPUMfmaIntrinsic; +def int_amdgcn_mfma_f64_4x4x4f64 : AMDGPUMfmaIntrinsic; //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend