diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -3,6 +3,7 @@ altivec.h ammintrin.h amxintrin.h + amxintrin_experiment.h arm_acle.h arm_cmse.h armintr.h diff --git a/clang/lib/Headers/amxintrin.h b/clang/lib/Headers/amxintrin.h --- a/clang/lib/Headers/amxintrin.h +++ b/clang/lib/Headers/amxintrin.h @@ -221,58 +221,5 @@ #define _tile_dpbf16ps(dst, src0, src1) \ __builtin_ia32_tdpbf16ps((dst), (src0), (src1)) -#define __DEFAULT_FN_ATTRS_INT8 \ - __attribute__((__always_inline__, __nodebug__, __target__("amx-int8"))) - -typedef int _tile1024i __attribute__((__vector_size__(1024), __aligned__(64))); -static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 -_tile_loadd_internal(unsigned short m, unsigned short n, const void *base, - __SIZE_TYPE__ stride) { - return __builtin_ia32_tileloadd64_internal(m, n, base, - (__SIZE_TYPE__)(stride)); -} - -static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 -_tile_dpbssd_internal(unsigned short m, unsigned short n, unsigned short k, - _tile1024i dst, _tile1024i src1, _tile1024i src2) { - return __builtin_ia32_tdpbssd_internal(m, n, k, dst, src1, src2); -} - -static __inline__ void __DEFAULT_FN_ATTRS_INT8 -_tile_stored_internal(unsigned short m, unsigned short n, void *base, - __SIZE_TYPE__ stride, _tile1024i tile) { - return __builtin_ia32_tilestored64_internal(m, n, base, - (__SIZE_TYPE__)(stride), tile); -} - -typedef struct __tile1024i_str { - const unsigned short row; - const unsigned short col; - _tile1024i tile; -} __tile1024i; - -__DEFAULT_FN_ATTRS_TILE -static void __tile_loadd(__tile1024i *dst, const void *base, - __SIZE_TYPE__ stride) { - dst->tile = _tile_loadd_internal(dst->row, dst->col, base, stride); -} - -__DEFAULT_FN_ATTRS_INT8 -static void __tile_dpbssd(__tile1024i *dst, __tile1024i src1, - __tile1024i src2) { - dst->tile = _tile_dpbssd_internal(src1.row, src2.col, src1.col, dst->tile, - src1.tile, src2.tile); -} - -__DEFAULT_FN_ATTRS_TILE -static void __tile_stored(void *base, __SIZE_TYPE__ stride, __tile1024i src) { - _tile_stored_internal(src.row, src.col, base, stride, src.tile); -} - -__DEFAULT_FN_ATTRS_TILE -static void __tile_zero(__tile1024i *dst) { - dst->tile = __builtin_ia32_tilezero_internal(dst->row, dst->col); -} - #endif /* __x86_64__ */ #endif /* __AMXINTRIN_H */ diff --git a/clang/lib/Headers/amxintrin_experiment.h b/clang/lib/Headers/amxintrin_experiment.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/amxintrin_experiment.h @@ -0,0 +1,75 @@ +/*===---------- amxintrin_experiment.h - AMX intrinsics -*- C/C++ -*---------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===------------------------------------------------------------------------=== + */ + +/* + * This file is experiment interface for AMX new programming model. + */ + +#ifndef __AMXINTRIN_EXPERIMENT_H +#define __AMXINTRIN_EXPERIMENT_H +#ifdef __x86_64__ + +#define __DEFAULT_FN_ATTRS_TILE \ + __attribute__((__always_inline__, __nodebug__, __target__("amx-tile"))) + +#define __DEFAULT_FN_ATTRS_INT8 \ + __attribute__((__always_inline__, __nodebug__, __target__("amx-int8"))) + +typedef int _tile1024i __attribute__((__vector_size__(1024), __aligned__(64))); +static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 +_tile_loadd_internal(unsigned short m, unsigned short n, const void *base, + __SIZE_TYPE__ stride) { + return __builtin_ia32_tileloadd64_internal(m, n, base, + (__SIZE_TYPE__)(stride)); +} + +static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 +_tile_dpbssd_internal(unsigned short m, unsigned short n, unsigned short k, + _tile1024i dst, _tile1024i src1, _tile1024i src2) { + return __builtin_ia32_tdpbssd_internal(m, n, k, dst, src1, src2); +} + +static __inline__ void __DEFAULT_FN_ATTRS_INT8 +_tile_stored_internal(unsigned short m, unsigned short n, void *base, + __SIZE_TYPE__ stride, _tile1024i tile) { + return __builtin_ia32_tilestored64_internal(m, n, base, + (__SIZE_TYPE__)(stride), tile); +} + +typedef struct __tile1024i_str { + const unsigned short row; + const unsigned short col; + _tile1024i tile; +} __tile1024i; + +__DEFAULT_FN_ATTRS_TILE +static void __tile_loadd(__tile1024i *dst, const void *base, + __SIZE_TYPE__ stride) { + dst->tile = _tile_loadd_internal(dst->row, dst->col, base, stride); +} + +__DEFAULT_FN_ATTRS_INT8 +static void __tile_dpbssd(__tile1024i *dst, __tile1024i src1, + __tile1024i src2) { + dst->tile = _tile_dpbssd_internal(src1.row, src2.col, src1.col, dst->tile, + src1.tile, src2.tile); +} + +__DEFAULT_FN_ATTRS_TILE +static void __tile_stored(void *base, __SIZE_TYPE__ stride, __tile1024i src) { + _tile_stored_internal(src.row, src.col, base, stride, src.tile); +} + +__DEFAULT_FN_ATTRS_TILE +static void __tile_zero(__tile1024i *dst) { + dst->tile = __builtin_ia32_tilezero_internal(dst->row, dst->col); +} + +#endif /* __x86_64__ */ +#endif /* __AMXINTRIN_EXPERIMENT_H */ diff --git a/clang/test/CodeGen/X86/amx_api.c b/clang/test/CodeGen/X86/amx_api.c --- a/clang/test/CodeGen/X86/amx_api.c +++ b/clang/test/CodeGen/X86/amx_api.c @@ -2,6 +2,7 @@ // RUN: -target-feature +amx-bf16 -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK #include +#include char buf[1024]; #define STRIDE 32