Index: clang/include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- clang/include/clang/Basic/BuiltinsAMDGPU.def +++ clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -212,5 +212,30 @@ BUILTIN(__builtin_r600_recipsqrt_ieee, "dd", "nc") BUILTIN(__builtin_r600_recipsqrt_ieeef, "ff", "nc") +//===----------------------------------------------------------------------===// +// MFMA builtins. +//===----------------------------------------------------------------------===// + +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x1f32, "V32fffV32fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x1f32, "V16fffV16fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_4x4x1f32, "V4fffV4fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x2f32, "V16fffV16fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x4f32, "V4fffV4fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x4f16, "V32fV4hV4hV32fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x4f16, "V16fV4hV4hV16fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_4x4x4f16, "V4fV4hV4hV4fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x8f16, "V16fV4hV4hV16fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x16f16, "V4fV4hV4hV4fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_32x32x4i8, "V32iiiV32iIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_16x16x4i8, "V16iiiV16iIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_4x4x4i8, "V4iiiV4iIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_32x32x8i8, "V16iiiV16iIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_16x16x16i8, "V4iiiV4iIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x2bf16, "V32fV2sV2sV32fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x2bf16, "V16fV2sV2sV16fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_4x4x2bf16, "V4fV2sV2sV4fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x4bf16, "V16fV2sV2sV16fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x8bf16, "V4fV2sV2sV4fIiIiIi", "nc", "mai-insts") + #undef BUILTIN #undef TARGET_BUILTIN Index: clang/lib/Basic/Targets/AMDGPU.cpp =================================================================== --- clang/lib/Basic/Targets/AMDGPU.cpp +++ clang/lib/Basic/Targets/AMDGPU.cpp @@ -163,6 +163,7 @@ Features["dot4-insts"] = true; Features["dot5-insts"] = true; Features["dot6-insts"] = true; + Features["mai-insts"] = true; LLVM_FALLTHROUGH; case GK_GFX906: Features["dl-insts"] = true; Index: clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -0,0 +1,161 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -S -emit-llvm -o - %s | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp64:enable + +typedef float v4f __attribute__((ext_vector_type(4))); +typedef float v16f __attribute__((ext_vector_type(16))); +typedef float v32f __attribute__((ext_vector_type(32))); +typedef half v4h __attribute__((ext_vector_type(4))); +typedef half v16h __attribute__((ext_vector_type(16))); +typedef half v32h __attribute__((ext_vector_type(32))); +typedef int v4i __attribute__((ext_vector_type(4))); +typedef int v16i __attribute__((ext_vector_type(16))); +typedef int v32i __attribute__((ext_vector_type(32))); +typedef short v2s __attribute__((ext_vector_type(2))); +typedef short v4s __attribute__((ext_vector_type(4))); +typedef short v16s __attribute__((ext_vector_type(16))); +typedef short v32s __attribute__((ext_vector_type(32))); +typedef double v4d __attribute__((ext_vector_type(4))); + + +// CHECK-LABEL: @test_mfma_f32_32x32x1f32 +// CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %a, float %b, <32 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_32x32x1f32(global v32f* out, float a, float b, v32f c) +{ + *out = __builtin_amdgcn_mfma_f32_32x32x1f32(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_16x16x1f32 +// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %a, float %b, <16 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_16x16x1f32(global v16f* out, float a, float b, v16f c) +{ + *out = __builtin_amdgcn_mfma_f32_16x16x1f32(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_4x4x1f32 +// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float %a, float %b, <4 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_4x4x1f32(global v4f* out, float a, float b, v4f c) +{ + *out = __builtin_amdgcn_mfma_f32_4x4x1f32(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_32x32x2f32 +// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float %a, float %b, <16 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_32x32x2f32(global v16f* out, float a, float b, v16f c) +{ + *out = __builtin_amdgcn_mfma_f32_32x32x2f32(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_16x16x4f32 +// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float %a, float %b, <4 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_16x16x4f32(global v4f* out, float a, float b, v4f c) +{ + *out = __builtin_amdgcn_mfma_f32_16x16x4f32(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_32x32x4f16 +// CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %a, <4 x half> %b, <32 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_32x32x4f16(global v32f* out, v4h a, v4h b, v32f c) +{ + *out = __builtin_amdgcn_mfma_f32_32x32x4f16(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_16x16x4f16 +// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> %a, <4 x half> %b, <16 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_16x16x4f16(global v16f* out, v4h a, v4h b, v16f c) +{ + *out = __builtin_amdgcn_mfma_f32_16x16x4f16(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_4x4x4f16 +// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> %a, <4 x half> %b, <4 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_4x4x4f16(global v4f* out, v4h a, v4h b, v4f c) +{ + *out = __builtin_amdgcn_mfma_f32_4x4x4f16(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_32x32x8f16 +// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> %a, <4 x half> %b, <16 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_32x32x8f16(global v16f* out, v4h a, v4h b, v16f c) +{ + *out = __builtin_amdgcn_mfma_f32_32x32x8f16(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_16x16x16f16 +// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> %a, <4 x half> %b, <4 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_16x16x16f16(global v4f* out, v4h a, v4h b, v4f c) +{ + *out = __builtin_amdgcn_mfma_f32_16x16x16f16(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_i32_32x32x4i8 +// CHECK: call <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32 %a, i32 %b, <32 x i32> %c, i32 0, i32 0, i32 0) +void test_mfma_i32_32x32x4i8(global v32i* out, int a, int b, v32i c) +{ + *out = __builtin_amdgcn_mfma_i32_32x32x4i8(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_i32_16x16x4i8 +// CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32 %a, i32 %b, <16 x i32> %c, i32 0, i32 0, i32 0) +void test_mfma_i32_16x16x4i8(global v16i* out, int a, int b, v16i c) +{ + *out = __builtin_amdgcn_mfma_i32_16x16x4i8(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_i32_4x4x4i8 +// CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 %a, i32 %b, <4 x i32> %c, i32 0, i32 0, i32 0) +void test_mfma_i32_4x4x4i8(global v4i* out, int a, int b, v4i c) +{ + *out = __builtin_amdgcn_mfma_i32_4x4x4i8(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_i32_32x32x8i8 +// CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 %a, i32 %b, <16 x i32> %c, i32 0, i32 0, i32 0) +void test_mfma_i32_32x32x8i8(global v16i* out, int a, int b, v16i c) +{ + *out = __builtin_amdgcn_mfma_i32_32x32x8i8(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_i32_16x16x16i8 +// CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 %a, i32 %b, <4 x i32> %c, i32 0, i32 0, i32 0) +void test_mfma_i32_16x16x16i8(global v4i* out, int a, int b, v4i c) +{ + *out = __builtin_amdgcn_mfma_i32_16x16x16i8(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_32x32x2bf16 +// CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %a, <2 x i16> %b, <32 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_32x32x2bf16(global v32f* out, v2s a, v2s b, v32f c) +{ + *out = __builtin_amdgcn_mfma_f32_32x32x2bf16(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_16x16x2bf16 +// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16> %a, <2 x i16> %b, <16 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_16x16x2bf16(global v16f* out, v2s a, v2s b, v16f c) +{ + *out = __builtin_amdgcn_mfma_f32_16x16x2bf16(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_4x4x2bf16 +// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16> %a, <2 x i16> %b, <4 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_4x4x2bf16(global v4f* out, v2s a, v2s b, v4f c) +{ + *out = __builtin_amdgcn_mfma_f32_4x4x2bf16(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_32x32x4bf16 +// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16> %a, <2 x i16> %b, <16 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_32x32x4bf16(global v16f* out, v2s a, v2s b, v16f c) +{ + *out = __builtin_amdgcn_mfma_f32_32x32x4bf16(a, b, c, 0, 0, 0); +} + +// CHECK-LABEL: @test_mfma_f32_16x16x8bf16 +// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16> %a, <2 x i16> %b, <4 x float> %c, i32 0, i32 0, i32 0) +void test_mfma_f32_16x16x8bf16(global v4f* out, v2s a, v2s b, v4f c) +{ + *out = __builtin_amdgcn_mfma_f32_16x16x8bf16(a, b, c, 0, 0, 0); +} + Index: clang/test/SemaOpenCL/builtins-amdgcn-error-gfx908-param.cl =================================================================== --- clang/test/SemaOpenCL/builtins-amdgcn-error-gfx908-param.cl +++ clang/test/SemaOpenCL/builtins-amdgcn-error-gfx908-param.cl @@ -0,0 +1,159 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx908 -verify -S -o - %s + +#pragma OPENCL EXTENSION cl_khr_fp64:enable + +typedef float v4f __attribute__((ext_vector_type(4))); +typedef float v16f __attribute__((ext_vector_type(16))); +typedef float v32f __attribute__((ext_vector_type(32))); +typedef half v4h __attribute__((ext_vector_type(4))); +typedef half v16h __attribute__((ext_vector_type(16))); +typedef half v32h __attribute__((ext_vector_type(32))); +typedef int v4i __attribute__((ext_vector_type(4))); +typedef int v16i __attribute__((ext_vector_type(16))); +typedef int v32i __attribute__((ext_vector_type(32))); +typedef short v2s __attribute__((ext_vector_type(2))); +typedef short v4s __attribute__((ext_vector_type(4))); +typedef short v16s __attribute__((ext_vector_type(16))); +typedef short v32s __attribute__((ext_vector_type(32))); +typedef double v4d __attribute__((ext_vector_type(4))); + + +void test_mfma_f32_32x32x1f32(global v32f* out, float a, float b, v32f c, int d) +{ + *out = __builtin_amdgcn_mfma_f32_32x32x1f32(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x1f32' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x1f32(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x1f32' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x1f32(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x1f32' must be a constant integer}} +} + +void test_mfma_f32_16x16x1f32(global v16f* out, float a, float b, v16f c, int d) +{ + *out = __builtin_amdgcn_mfma_f32_16x16x1f32(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x1f32' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_16x16x1f32(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x1f32' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_16x16x1f32(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x1f32' must be a constant integer}} +} + +void test_mfma_f32_4x4x1f32(global v4f* out, float a, float b, v4f c, int d) +{ + *out = __builtin_amdgcn_mfma_f32_4x4x1f32(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_4x4x1f32' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_4x4x1f32(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_4x4x1f32' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_4x4x1f32(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_4x4x1f32' must be a constant integer}} +} + +void test_mfma_f32_32x32x2f32(global v16f* out, float a, float b, v16f c, int d) +{ + *out = __builtin_amdgcn_mfma_f32_32x32x2f32(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x2f32' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x2f32(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x2f32' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x2f32(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x2f32' must be a constant integer}} +} + +void test_mfma_f32_16x16x4f32(global v4f* out, float a, float b, v4f c, int d) +{ + *out = __builtin_amdgcn_mfma_f32_16x16x4f32(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x4f32' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_16x16x4f32(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x4f32' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_16x16x4f32(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x4f32' must be a constant integer}} +} + +void test_mfma_f32_32x32x4f16(global v32f* out, v4h a, v4h b, v32f c, int d) +{ + *out = __builtin_amdgcn_mfma_f32_32x32x4f16(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x4f16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x4f16(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x4f16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x4f16(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x4f16' must be a constant integer}} +} + +void test_mfma_f32_16x16x4f16(global v16f* out, v4h a, v4h b, v16f c, int d) +{ + *out = __builtin_amdgcn_mfma_f32_16x16x4f16(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x4f16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_16x16x4f16(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x4f16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_16x16x4f16(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x4f16' must be a constant integer}} +} + +void test_mfma_f32_4x4x4f16(global v4f* out, v4h a, v4h b, v4f c, int d) +{ + *out = __builtin_amdgcn_mfma_f32_4x4x4f16(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_4x4x4f16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_4x4x4f16(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_4x4x4f16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_4x4x4f16(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_4x4x4f16' must be a constant integer}} +} + +void test_mfma_f32_32x32x8f16(global v16f* out, v4h a, v4h b, v16f c, int d) +{ + *out = __builtin_amdgcn_mfma_f32_32x32x8f16(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x8f16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x8f16(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x8f16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x8f16(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x8f16' must be a constant integer}} +} + +void test_mfma_f32_16x16x16f16(global v4f* out, v4h a, v4h b, v4f c, int d) +{ + *out = __builtin_amdgcn_mfma_f32_16x16x16f16(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x16f16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_16x16x16f16(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x16f16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_16x16x16f16(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x16f16' must be a constant integer}} +} + +void test_mfma_i32_32x32x4i8(global v32i* out, int a, int b, v32i c, int d) +{ + *out = __builtin_amdgcn_mfma_i32_32x32x4i8(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_i32_32x32x4i8' must be a constant integer}} + *out = __builtin_amdgcn_mfma_i32_32x32x4i8(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_i32_32x32x4i8' must be a constant integer}} + *out = __builtin_amdgcn_mfma_i32_32x32x4i8(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_i32_32x32x4i8' must be a constant integer}} +} + +void test_mfma_i32_16x16x4i8(global v16i* out, int a, int b, v16i c, int d) +{ + *out = __builtin_amdgcn_mfma_i32_16x16x4i8(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_i32_16x16x4i8' must be a constant integer}} + *out = __builtin_amdgcn_mfma_i32_16x16x4i8(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_i32_16x16x4i8' must be a constant integer}} + *out = __builtin_amdgcn_mfma_i32_16x16x4i8(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_i32_16x16x4i8' must be a constant integer}} +} + +void test_mfma_i32_4x4x4i8(global v4i* out, int a, int b, v4i c, int d) +{ + *out = __builtin_amdgcn_mfma_i32_4x4x4i8(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_i32_4x4x4i8' must be a constant integer}} + *out = __builtin_amdgcn_mfma_i32_4x4x4i8(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_i32_4x4x4i8' must be a constant integer}} + *out = __builtin_amdgcn_mfma_i32_4x4x4i8(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_i32_4x4x4i8' must be a constant integer}} +} + +void test_mfma_i32_32x32x8i8(global v16i* out, int a, int b, v16i c, int d) +{ + *out = __builtin_amdgcn_mfma_i32_32x32x8i8(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_i32_32x32x8i8' must be a constant integer}} + *out = __builtin_amdgcn_mfma_i32_32x32x8i8(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_i32_32x32x8i8' must be a constant integer}} + *out = __builtin_amdgcn_mfma_i32_32x32x8i8(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_i32_32x32x8i8' must be a constant integer}} +} + +void test_mfma_i32_16x16x16i8(global v4i* out, int a, int b, v4i c, int d) +{ + *out = __builtin_amdgcn_mfma_i32_16x16x16i8(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_i32_16x16x16i8' must be a constant integer}} + *out = __builtin_amdgcn_mfma_i32_16x16x16i8(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_i32_16x16x16i8' must be a constant integer}} + *out = __builtin_amdgcn_mfma_i32_16x16x16i8(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_i32_16x16x16i8' must be a constant integer}} +} + +void test_mfma_f32_32x32x2bf16(global v32f* out, v2s a, v2s b, v32f c, int d) +{ + *out = __builtin_amdgcn_mfma_f32_32x32x2bf16(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x2bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x2bf16(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x2bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x2bf16(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x2bf16' must be a constant integer}} +} + +void test_mfma_f32_16x16x2bf16(global v16f* out, v2s a, v2s b, v16f c, int d) +{ + *out = __builtin_amdgcn_mfma_f32_16x16x2bf16(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x2bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_16x16x2bf16(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x2bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_16x16x2bf16(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x2bf16' must be a constant integer}} +} + +void test_mfma_f32_4x4x2bf16(global v4f* out, v2s a, v2s b, v4f c, int d) +{ + *out = __builtin_amdgcn_mfma_f32_4x4x2bf16(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_4x4x2bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_4x4x2bf16(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_4x4x2bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_4x4x2bf16(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_4x4x2bf16' must be a constant integer}} +} + +void test_mfma_f32_32x32x4bf16(global v16f* out, v2s a, v2s b, v16f c, int d) +{ + *out = __builtin_amdgcn_mfma_f32_32x32x4bf16(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x4bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x4bf16(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x4bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_32x32x4bf16(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_32x32x4bf16' must be a constant integer}} +} + +void test_mfma_f32_16x16x8bf16(global v4f* out, v2s a, v2s b, v4f c, int d) +{ + *out = __builtin_amdgcn_mfma_f32_16x16x8bf16(a, b, c, d, 0, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x8bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_16x16x8bf16(a, b, c, 0, d, 0); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x8bf16' must be a constant integer}} + *out = __builtin_amdgcn_mfma_f32_16x16x8bf16(a, b, c, 0, 0, d); // expected-error{{argument to '__builtin_amdgcn_mfma_f32_16x16x8bf16' must be a constant integer}} +} Index: llvm/include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1725,105 +1725,125 @@ def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicNoRtn; // llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp -def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32f32_ty], - [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_f32_16x16x1f32 : Intrinsic<[llvm_v16f32_ty], - [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_f32_4x4x1f32 : Intrinsic<[llvm_v4f32_ty], - [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_f32_32x32x2f32 : Intrinsic<[llvm_v16f32_ty], - [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_f32_16x16x4f32 : Intrinsic<[llvm_v4f32_ty], - [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_f32_32x32x4f16 : Intrinsic<[llvm_v32f32_ty], - [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_f32_16x16x4f16 : Intrinsic<[llvm_v16f32_ty], - [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_f32_4x4x4f16 : Intrinsic<[llvm_v4f32_ty], - [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_f32_32x32x8f16 : Intrinsic<[llvm_v16f32_ty], - [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_f32_16x16x16f16 : Intrinsic<[llvm_v4f32_ty], - [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_i32_32x32x4i8 : Intrinsic<[llvm_v32i32_ty], - [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_i32_16x16x4i8 : Intrinsic<[llvm_v16i32_ty], - [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_i32_4x4x4i8 : Intrinsic<[llvm_v4i32_ty], - [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_i32_32x32x8i8 : Intrinsic<[llvm_v16i32_ty], - [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_i32_16x16x16i8 : Intrinsic<[llvm_v4i32_ty], - [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_f32_32x32x2bf16 : Intrinsic<[llvm_v32f32_ty], - [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_f32_16x16x2bf16 : Intrinsic<[llvm_v16f32_ty], - [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_f32_4x4x2bf16 : Intrinsic<[llvm_v4f32_ty], - [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_f32_32x32x4bf16 : Intrinsic<[llvm_v16f32_ty], - [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; - -def int_amdgcn_mfma_f32_16x16x8bf16 : Intrinsic<[llvm_v4f32_ty], - [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; +def int_amdgcn_mfma_f32_32x32x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x1f32">, + Intrinsic<[llvm_v32f32_ty], + [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_f32_16x16x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x1f32">, + Intrinsic<[llvm_v16f32_ty], + [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_f32_4x4x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x1f32">, + Intrinsic<[llvm_v4f32_ty], + [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_f32_32x32x2f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x2f32">, + Intrinsic<[llvm_v16f32_ty], + [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_f32_16x16x4f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4f32">, + Intrinsic<[llvm_v4f32_ty], + [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_f32_32x32x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4f16">, + Intrinsic<[llvm_v32f32_ty], + [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_f32_16x16x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4f16">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_f32_4x4x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x4f16">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_f32_32x32x8f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x8f16">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_f32_16x16x16f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x16f16">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_i32_32x32x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_32x32x4i8">, + Intrinsic<[llvm_v32i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_i32_16x16x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_16x16x4i8">, + Intrinsic<[llvm_v16i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_i32_4x4x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_4x4x4i8">, + Intrinsic<[llvm_v4i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_i32_32x32x8i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_32x32x8i8">, + Intrinsic<[llvm_v16i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_i32_16x16x16i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_16x16x16i8">, + Intrinsic<[llvm_v4i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_f32_32x32x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x2bf16">, + Intrinsic<[llvm_v32f32_ty], + [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_f32_16x16x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x2bf16">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_f32_4x4x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x2bf16">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_f32_32x32x4bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4bf16">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + +def int_amdgcn_mfma_f32_16x16x8bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x8bf16">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend