diff --git a/llvm/docs/BitCodeFormat.rst b/llvm/docs/BitCodeFormat.rst --- a/llvm/docs/BitCodeFormat.rst +++ b/llvm/docs/BitCodeFormat.rst @@ -1134,6 +1134,22 @@ The ``HALF`` record (code 10) adds a ``half`` (16-bit floating point) type to the type table. +TYPE_CODE_F8E5M2 Record +^^^^^^^^^^^^^^^^^^^^^ + +``[F8E5M2]`` + +The ``F8E5M2`` record (code 26) adds an ``F8E5M2`` (8-bit floating point) type to +the type table. + +TYPE_CODE_F8E4M3 Record +^^^^^^^^^^^^^^^^^^^^^ + +``[F8E4M3]`` + +The ``F8E4M3`` record (code 27) adds an ``F8E4M3`` (8-bit floating point) type to +the type table. + TYPE_CODE_BFLOAT Record ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -165,7 +165,9 @@ LLVMTokenTypeKind, /**< Tokens */ LLVMScalableVectorTypeKind, /**< Scalable SIMD vector type */ LLVMBFloatTypeKind, /**< 16 bit brain floating point type */ - LLVMX86_AMXTypeKind /**< X86 AMX */ + LLVMX86_AMXTypeKind, /**< X86 AMX */ + LLVMFloatE4M3TypeKind, /**< 8 bit floating point with 3 bit mantissa */ + LLVMFloatE5M2TypeKind /**< 8 bit floating point with 2 bit mantissa */ } LLVMTypeKind; typedef enum { @@ -1191,6 +1193,16 @@ * @{ */ +/** + * Obtain a 8-bit floating point type from a context. + */ +LLVMTypeRef LLVMFloatE4M3TypeInContext(LLVMContextRef C); + +/** + * Obtain a 8-bit floating point type from a context. + */ +LLVMTypeRef LLVMFloatE5M2TypeInContext(LLVMContextRef C); + /** * Obtain a 16-bit floating point type from a context. */ @@ -1232,6 +1244,8 @@ * * These map to the functions in this group of the same name. */ +LLVMTypeRef LLVMFloatE4M3Type(void); +LLVMTypeRef LLVMFloatE5M2Type(void); LLVMTypeRef LLVMHalfType(void); LLVMTypeRef LLVMBFloatType(void); LLVMTypeRef LLVMFloatType(void); diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -175,6 +175,9 @@ TYPE_CODE_X86_AMX = 24, // X86 AMX TYPE_CODE_OPAQUE_POINTER = 25, // OPAQUE_POINTER: [addrspace] + + TYPE_CODE_F8E5M2 = 26, // F8E5M2 + TYPE_CODE_F8E4M3 = 27, // F8E4M3 }; enum OperandBundleTagCode { diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1816,6 +1816,10 @@ static const fltSemantics &EVTToAPFloatSemantics(EVT VT) { switch (VT.getScalarType().getSimpleVT().SimpleTy) { default: llvm_unreachable("Unknown FP format"); + case MVT::f8e4m3: + return APFloat::Float8E4M3FN(); + case MVT::f8e5m2: + return APFloat::Float8E5M2(); case MVT::f16: return APFloat::IEEEhalf(); case MVT::bf16: return APFloat::BFloat(); case MVT::f32: return APFloat::IEEEsingle(); diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -28,214 +28,246 @@ def i64 : ValueType<64, 8>; // 64-bit integer value def i128 : ValueType<128, 9>; // 128-bit integer value -def bf16 : ValueType<16, 10>; // 16-bit brain floating point value -def f16 : ValueType<16, 11>; // 16-bit floating point value -def f32 : ValueType<32, 12>; // 32-bit floating point value -def f64 : ValueType<64, 13>; // 64-bit floating point value -def f80 : ValueType<80, 14>; // 80-bit floating point value -def f128 : ValueType<128, 15>; // 128-bit floating point value -def ppcf128 : ValueType<128, 16>; // PPC 128-bit floating point value - -def v1i1 : ValueType<1, 17>; // 1 x i1 vector value -def v2i1 : ValueType<2, 18>; // 2 x i1 vector value -def v4i1 : ValueType<4, 19>; // 4 x i1 vector value -def v8i1 : ValueType<8, 20>; // 8 x i1 vector value -def v16i1 : ValueType<16, 21>; // 16 x i1 vector value -def v32i1 : ValueType<32, 22>; // 32 x i1 vector value -def v64i1 : ValueType<64, 23>; // 64 x i1 vector value -def v128i1 : ValueType<128, 24>; // 128 x i1 vector value -def v256i1 : ValueType<256, 25>; // 256 x i1 vector value -def v512i1 : ValueType<512, 26>; // 512 x i1 vector value -def v1024i1 : ValueType<1024, 27>; // 1024 x i1 vector value -def v2048i1 : ValueType<2048, 28>; // 2048 x i1 vector value - -def v128i2 : ValueType<256, 29>; // 128 x i2 vector value -def v256i2 : ValueType<512, 30>; // 256 x i2 vector value - -def v64i4 : ValueType<256, 31>; // 64 x i4 vector value -def v128i4 : ValueType<512, 32>; // 128 x i4 vector value - -def v1i8 : ValueType<8, 33>; // 1 x i8 vector value -def v2i8 : ValueType<16, 34>; // 2 x i8 vector value -def v4i8 : ValueType<32, 35>; // 4 x i8 vector value -def v8i8 : ValueType<64, 36>; // 8 x i8 vector value -def v16i8 : ValueType<128, 37>; // 16 x i8 vector value -def v32i8 : ValueType<256, 38>; // 32 x i8 vector value -def v64i8 : ValueType<512, 39>; // 64 x i8 vector value -def v128i8 : ValueType<1024, 40>; // 128 x i8 vector value -def v256i8 : ValueType<2048, 41>; // 256 x i8 vector value -def v512i8 : ValueType<4096, 42>; // 512 x i8 vector value -def v1024i8 : ValueType<8192, 43>; // 1024 x i8 vector value - -def v1i16 : ValueType<16, 44>; // 1 x i16 vector value -def v2i16 : ValueType<32, 45>; // 2 x i16 vector value -def v3i16 : ValueType<48, 46>; // 3 x i16 vector value -def v4i16 : ValueType<64, 47>; // 4 x i16 vector value -def v8i16 : ValueType<128, 48>; // 8 x i16 vector value -def v16i16 : ValueType<256, 49>; // 16 x i16 vector value -def v32i16 : ValueType<512, 50>; // 32 x i16 vector value -def v64i16 : ValueType<1024, 51>; // 64 x i16 vector value -def v128i16 : ValueType<2048, 52>; // 128 x i16 vector value -def v256i16 : ValueType<4096, 53>; // 256 x i16 vector value -def v512i16 : ValueType<8192, 54>; // 512 x i16 vector value - -def v1i32 : ValueType<32, 55>; // 1 x i32 vector value -def v2i32 : ValueType<64, 56>; // 2 x i32 vector value -def v3i32 : ValueType<96, 57>; // 3 x i32 vector value -def v4i32 : ValueType<128, 58>; // 4 x i32 vector value -def v5i32 : ValueType<160, 59>; // 5 x i32 vector value -def v6i32 : ValueType<192, 60>; // 6 x f32 vector value -def v7i32 : ValueType<224, 61>; // 7 x f32 vector value -def v8i32 : ValueType<256, 62>; // 8 x i32 vector value -def v9i32 : ValueType<288, 63>; // 9 x i32 vector value -def v10i32 : ValueType<320, 64>; // 10 x i32 vector value -def v11i32 : ValueType<352, 65>; // 11 x i32 vector value -def v12i32 : ValueType<384, 66>; // 12 x i32 vector value -def v16i32 : ValueType<512, 67>; // 16 x i32 vector value -def v32i32 : ValueType<1024, 68>; // 32 x i32 vector value -def v64i32 : ValueType<2048, 69>; // 64 x i32 vector value -def v128i32 : ValueType<4096, 70>; // 128 x i32 vector value -def v256i32 : ValueType<8192, 71>; // 256 x i32 vector value -def v512i32 : ValueType<16384, 72>; // 512 x i32 vector value -def v1024i32 : ValueType<32768, 73>; // 1024 x i32 vector value -def v2048i32 : ValueType<65536, 74>; // 2048 x i32 vector value - -def v1i64 : ValueType<64, 75>; // 1 x i64 vector value -def v2i64 : ValueType<128, 76>; // 2 x i64 vector value -def v3i64 : ValueType<192, 77>; // 3 x i64 vector value -def v4i64 : ValueType<256, 78>; // 4 x i64 vector value -def v8i64 : ValueType<512, 79>; // 8 x i64 vector value -def v16i64 : ValueType<1024, 80>; // 16 x i64 vector value -def v32i64 : ValueType<2048, 81>; // 32 x i64 vector value -def v64i64 : ValueType<4096, 82>; // 64 x i64 vector value -def v128i64 : ValueType<8192, 83>; // 128 x i64 vector value -def v256i64 : ValueType<16384, 84>; // 256 x i64 vector value - -def v1i128 : ValueType<128, 85>; // 1 x i128 vector value - -def v1f16 : ValueType<16, 86>; // 1 x f16 vector value -def v2f16 : ValueType<32, 87>; // 2 x f16 vector value -def v3f16 : ValueType<48, 88>; // 3 x f16 vector value -def v4f16 : ValueType<64, 89>; // 4 x f16 vector value -def v8f16 : ValueType<128, 90>; // 8 x f16 vector value -def v16f16 : ValueType<256, 91>; // 16 x f16 vector value -def v32f16 : ValueType<512, 92>; // 32 x f16 vector value -def v64f16 : ValueType<1024, 93>; // 64 x f16 vector value -def v128f16 : ValueType<2048, 94>; // 128 x f16 vector value -def v256f16 : ValueType<4096, 95>; // 256 x f16 vector value -def v512f16 : ValueType<8192, 96>; // 512 x f16 vector value - -def v2bf16 : ValueType<32, 97>; // 2 x bf16 vector value -def v3bf16 : ValueType<48, 98>; // 3 x bf16 vector value -def v4bf16 : ValueType<64, 99>; // 4 x bf16 vector value -def v8bf16 : ValueType<128, 100>; // 8 x bf16 vector value -def v16bf16 : ValueType<256, 101>; // 16 x bf16 vector value -def v32bf16 : ValueType<512, 102>; // 32 x bf16 vector value -def v64bf16 : ValueType<1024, 103>; // 64 x bf16 vector value -def v128bf16 : ValueType<2048, 104>; // 128 x bf16 vector value - -def v1f32 : ValueType<32, 105>; // 1 x f32 vector value -def v2f32 : ValueType<64, 106>; // 2 x f32 vector value -def v3f32 : ValueType<96, 107>; // 3 x f32 vector value -def v4f32 : ValueType<128, 108>; // 4 x f32 vector value -def v5f32 : ValueType<160, 109>; // 5 x f32 vector value -def v6f32 : ValueType<192, 110>; // 6 x f32 vector value -def v7f32 : ValueType<224, 111>; // 7 x f32 vector value -def v8f32 : ValueType<256, 112>; // 8 x f32 vector value -def v9f32 : ValueType<288, 113>; // 9 x f32 vector value -def v10f32 : ValueType<320, 114>; // 10 x f32 vector value -def v11f32 : ValueType<352, 115>; // 11 x f32 vector value -def v12f32 : ValueType<384, 116>; // 12 x f32 vector value -def v16f32 : ValueType<512, 117>; // 16 x f32 vector value -def v32f32 : ValueType<1024, 118>; // 32 x f32 vector value -def v64f32 : ValueType<2048, 119>; // 64 x f32 vector value -def v128f32 : ValueType<4096, 120>; // 128 x f32 vector value -def v256f32 : ValueType<8192, 121>; // 256 x f32 vector value -def v512f32 : ValueType<16384, 122>; // 512 x f32 vector value -def v1024f32 : ValueType<32768, 123>; // 1024 x f32 vector value -def v2048f32 : ValueType<65536, 124>; // 2048 x f32 vector value - -def v1f64 : ValueType<64, 125>; // 1 x f64 vector value -def v2f64 : ValueType<128, 126>; // 2 x f64 vector value -def v3f64 : ValueType<192, 127>; // 3 x f64 vector value -def v4f64 : ValueType<256, 128>; // 4 x f64 vector value -def v8f64 : ValueType<512, 129>; // 8 x f64 vector value -def v16f64 : ValueType<1024, 130>; // 16 x f64 vector value -def v32f64 : ValueType<2048, 131>; // 32 x f64 vector value -def v64f64 : ValueType<4096, 132>; // 64 x f64 vector value -def v128f64 : ValueType<8192, 133>; // 128 x f64 vector value -def v256f64 : ValueType<16384, 134>; // 256 x f64 vector value - -def nxv1i1 : ValueType<1, 135>; // n x 1 x i1 vector value -def nxv2i1 : ValueType<2, 136>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 137>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 138>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 139>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 140>; // n x 32 x i1 vector value -def nxv64i1 : ValueType<64, 141>; // n x 64 x i1 vector value - -def nxv1i8 : ValueType<8, 142>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 143>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 144>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 145>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 146>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 147>; // n x 32 x i8 vector value -def nxv64i8 : ValueType<512, 148>; // n x 64 x i8 vector value - -def nxv1i16 : ValueType<16, 149>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 150>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 151>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 152>; // n x 8 x i16 vector value -def nxv16i16 : ValueType<256, 153>; // n x 16 x i16 vector value -def nxv32i16 : ValueType<512, 154>; // n x 32 x i16 vector value - -def nxv1i32 : ValueType<32, 155>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 156>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 157>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 158>; // n x 8 x i32 vector value -def nxv16i32 : ValueType<512, 159>; // n x 16 x i32 vector value -def nxv32i32 : ValueType<1024, 160>; // n x 32 x i32 vector value - -def nxv1i64 : ValueType<64, 161>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 162>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 163>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 164>; // n x 8 x i64 vector value -def nxv16i64 : ValueType<1024, 165>; // n x 16 x i64 vector value -def nxv32i64 : ValueType<2048, 166>; // n x 32 x i64 vector value - -def nxv1f16 : ValueType<16, 167>; // n x 1 x f16 vector value -def nxv2f16 : ValueType<32, 168>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64, 169>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 170>; // n x 8 x f16 vector value -def nxv16f16 : ValueType<256, 171>; // n x 16 x f16 vector value -def nxv32f16 : ValueType<512, 172>; // n x 32 x f16 vector value - -def nxv1bf16 : ValueType<16, 173>; // n x 1 x bf16 vector value -def nxv2bf16 : ValueType<32, 174>; // n x 2 x bf16 vector value -def nxv4bf16 : ValueType<64, 175>; // n x 4 x bf16 vector value -def nxv8bf16 : ValueType<128, 176>; // n x 8 x bf16 vector value -def nxv16bf16 : ValueType<256, 177>; // n x 16 x bf16 vector value -def nxv32bf16 : ValueType<512, 178>; // n x 32 x bf16 vector value - -def nxv1f32 : ValueType<32, 179>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64, 180>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 181>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 182>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 183>; // n x 16 x f32 vector value - -def nxv1f64 : ValueType<64, 184>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 185>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 186>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 187>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64, 188>; // X86 MMX value -def FlagVT : ValueType<0, 189>; // Pre-RA sched glue -def isVoid : ValueType<0, 190>; // Produces no value -def untyped : ValueType<8, 191>; // Produces an untyped value -def funcref : ValueType<0, 192>; // WebAssembly's funcref type -def externref : ValueType<0, 193>; // WebAssembly's externref type -def x86amx : ValueType<8192, 194>; // X86 AMX value -def i64x8 : ValueType<512, 195>; // 8 Consecutive GPRs (AArch64) +def f8e4m3 : ValueType<8, 10>; // 8-bit brain floating point value +def f8e5m2 : ValueType<8, 11>; // 8-bit brain floating point value +def bf16 : ValueType<16, 12>; // 16-bit brain floating point value +def f16 : ValueType<16, 13>; // 16-bit floating point value +def f32 : ValueType<32, 14>; // 32-bit floating point value +def f64 : ValueType<64, 15>; // 64-bit floating point value +def f80 : ValueType<80, 16>; // 80-bit floating point value +def f128 : ValueType<128, 17>; // 128-bit floating point value +def ppcf128 : ValueType<128, 18>; // PPC 128-bit floating point value + +def v1i1 : ValueType<1, 19>; // 1 x i1 vector value +def v2i1 : ValueType<2, 20>; // 2 x i1 vector value +def v4i1 : ValueType<4, 21>; // 4 x i1 vector value +def v8i1 : ValueType<8, 22>; // 8 x i1 vector value +def v16i1 : ValueType<16, 23>; // 16 x i1 vector value +def v32i1 : ValueType<32, 24>; // 32 x i1 vector value +def v64i1 : ValueType<64, 25>; // 64 x i1 vector value +def v128i1 : ValueType<128, 26>; // 128 x i1 vector value +def v256i1 : ValueType<256, 27>; // 256 x i1 vector value +def v512i1 : ValueType<512, 28>; // 512 x i1 vector value +def v1024i1 : ValueType<1024, 29>; // 1024 x i1 vector value +def v2048i1 : ValueType<2048, 30>; // 2048 x i1 vector value + +def v128i2 : ValueType<256, 31>; // 128 x i2 vector value +def v256i2 : ValueType<512, 32>; // 256 x i2 vector value + +def v64i4 : ValueType<256, 33>; // 64 x i4 vector value +def v128i4 : ValueType<512, 34>; // 128 x i4 vector value + +def v1i8 : ValueType<8, 35>; // 1 x i8 vector value +def v2i8 : ValueType<16, 36>; // 2 x i8 vector value +def v4i8 : ValueType<32, 37>; // 4 x i8 vector value +def v8i8 : ValueType<64, 38>; // 8 x i8 vector value +def v16i8 : ValueType<128, 39>; // 16 x i8 vector value +def v32i8 : ValueType<256, 40>; // 32 x i8 vector value +def v64i8 : ValueType<512, 41>; // 64 x i8 vector value +def v128i8 : ValueType<1024, 42>; // 128 x i8 vector value +def v256i8 : ValueType<2048, 43>; // 256 x i8 vector value +def v512i8 : ValueType<4096, 44>; // 512 x i8 vector value +def v1024i8 : ValueType<8192, 45>; // 1024 x i8 vector value + +def v1i16 : ValueType<16, 46>; // 1 x i16 vector value +def v2i16 : ValueType<32, 47>; // 2 x i16 vector value +def v3i16 : ValueType<48, 48>; // 3 x i16 vector value +def v4i16 : ValueType<64, 49>; // 4 x i16 vector value +def v8i16 : ValueType<128, 50>; // 8 x i16 vector value +def v16i16 : ValueType<256, 51>; // 16 x i16 vector value +def v32i16 : ValueType<512, 52>; // 32 x i16 vector value +def v64i16 : ValueType<1024, 53>; // 64 x i16 vector value +def v128i16 : ValueType<2048, 54>; // 128 x i16 vector value +def v256i16 : ValueType<4096, 55>; // 256 x i16 vector value +def v512i16 : ValueType<8192, 56>; // 512 x i16 vector value + +def v1i32 : ValueType<32, 57>; // 1 x i32 vector value +def v2i32 : ValueType<64, 58>; // 2 x i32 vector value +def v3i32 : ValueType<96, 59>; // 3 x i32 vector value +def v4i32 : ValueType<128, 60>; // 4 x i32 vector value +def v5i32 : ValueType<160, 61>; // 5 x i32 vector value +def v6i32 : ValueType<192, 62>; // 6 x f32 vector value +def v7i32 : ValueType<224, 63>; // 7 x f32 vector value +def v8i32 : ValueType<256, 64>; // 8 x i32 vector value +def v9i32 : ValueType<288, 65>; // 9 x i32 vector value +def v10i32 : ValueType<320, 66>; // 10 x i32 vector value +def v11i32 : ValueType<352, 67>; // 11 x i32 vector value +def v12i32 : ValueType<384, 68>; // 12 x i32 vector value +def v16i32 : ValueType<512, 69>; // 16 x i32 vector value +def v32i32 : ValueType<1024, 70>; // 32 x i32 vector value +def v64i32 : ValueType<2048, 71>; // 64 x i32 vector value +def v128i32 : ValueType<4096, 72>; // 128 x i32 vector value +def v256i32 : ValueType<8192, 73>; // 256 x i32 vector value +def v512i32 : ValueType<16384, 74>; // 512 x i32 vector value +def v1024i32 : ValueType<32768, 75>; // 1024 x i32 vector value +def v2048i32 : ValueType<65536, 76>; // 2048 x i32 vector value + +def v1i64 : ValueType<64, 77>; // 1 x i64 vector value +def v2i64 : ValueType<128, 78>; // 2 x i64 vector value +def v3i64 : ValueType<192, 79>; // 3 x i64 vector value +def v4i64 : ValueType<256, 80>; // 4 x i64 vector value +def v8i64 : ValueType<512, 81>; // 8 x i64 vector value +def v16i64 : ValueType<1024, 82>; // 16 x i64 vector value +def v32i64 : ValueType<2048, 83>; // 32 x i64 vector value +def v64i64 : ValueType<4096, 84>; // 64 x i64 vector value +def v128i64 : ValueType<8192, 85>; // 128 x i64 vector value +def v256i64 : ValueType<16384, 86>; // 256 x i64 vector value + +def v1i128 : ValueType<128, 87>; // 1 x i128 vector value + +def v1f8e4m3 : ValueType<8, 88>; // 1 x fp8 vector value +def v2f8e4m3 : ValueType<16, 89>; // 2 x fp8 vector value +def v3f8e4m3 : ValueType<24, 90>; // 3 x fp8 vector value +def v4f8e4m3 : ValueType<32, 91>; // 4 x fp8 vector value +def v8f8e4m3 : ValueType<64, 92>; // 8 x fp8 vector value +def v16f8e4m3 : ValueType<128, 93>; // 16 x fp8 vector value +def v32f8e4m3 : ValueType<256, 94>; // 32 x fp8 vector value +def v64f8e4m3 : ValueType<512, 95>; // 64 x fp8 vector value +def v128f8e4m3 : ValueType<1024, 96>; // 128 x fp8 vector value + +def v1f8e5m2 : ValueType<8, 97>; // 1 x fp8 vector value +def v2f8e5m2 : ValueType<16, 98>; // 2 x fp8 vector value +def v3f8e5m2 : ValueType<24, 99>; // 3 x fp8 vector value +def v4f8e5m2 : ValueType<32, 100>; // 4 x fp8 vector value +def v8f8e5m2 : ValueType<64, 101>; // 8 x fp8 vector value +def v16f8e5m2 : ValueType<128, 102>; // 16 x fp8 vector value +def v32f8e5m2 : ValueType<256, 103>; // 32 x fp8 vector value +def v64f8e5m2 : ValueType<512, 104>; // 64 x fp8 vector value +def v128f8e5m2 : ValueType<1024, 105>; // 128 x fp8 vector value + +def v1f16 : ValueType<16, 106>; // 1 x f16 vector value +def v2f16 : ValueType<32, 107>; // 2 x f16 vector value +def v3f16 : ValueType<48, 108>; // 3 x f16 vector value +def v4f16 : ValueType<64, 109>; // 4 x f16 vector value +def v8f16 : ValueType<128, 110>; // 8 x f16 vector value +def v16f16 : ValueType<256, 111>; // 16 x f16 vector value +def v32f16 : ValueType<512, 112>; // 32 x f16 vector value +def v64f16 : ValueType<1024, 113>; // 64 x f16 vector value +def v128f16 : ValueType<2048, 114>; // 128 x f16 vector value +def v256f16 : ValueType<4096, 115>; // 256 x f16 vector value +def v512f16 : ValueType<8192, 116>; // 512 x f16 vector value + +def v2bf16 : ValueType<32, 117>; // 2 x bf16 vector value +def v3bf16 : ValueType<48, 118>; // 3 x bf16 vector value +def v4bf16 : ValueType<64, 119>; // 4 x bf16 vector value +def v8bf16 : ValueType<128, 120>; // 8 x bf16 vector value +def v16bf16 : ValueType<256, 121>; // 16 x bf16 vector value +def v32bf16 : ValueType<512, 122>; // 32 x bf16 vector value +def v64bf16 : ValueType<1024, 123>; // 64 x bf16 vector value +def v128bf16 : ValueType<2048, 124>; // 128 x bf16 vector value + +def v1f32 : ValueType<32, 125>; // 1 x f32 vector value +def v2f32 : ValueType<64, 126>; // 2 x f32 vector value +def v3f32 : ValueType<96, 127>; // 3 x f32 vector value +def v4f32 : ValueType<128, 128>; // 4 x f32 vector value +def v5f32 : ValueType<160, 129>; // 5 x f32 vector value +def v6f32 : ValueType<192, 130>; // 6 x f32 vector value +def v7f32 : ValueType<224, 131>; // 7 x f32 vector value +def v8f32 : ValueType<256, 132>; // 8 x f32 vector value +def v9f32 : ValueType<288, 133>; // 9 x f32 vector value +def v10f32 : ValueType<320, 134>; // 10 x f32 vector value +def v11f32 : ValueType<352, 135>; // 11 x f32 vector value +def v12f32 : ValueType<384, 136>; // 12 x f32 vector value +def v16f32 : ValueType<512, 137>; // 16 x f32 vector value +def v32f32 : ValueType<1024, 138>; // 32 x f32 vector value +def v64f32 : ValueType<2048, 139>; // 64 x f32 vector value +def v128f32 : ValueType<4096, 140>; // 128 x f32 vector value +def v256f32 : ValueType<8192, 141>; // 256 x f32 vector value +def v512f32 : ValueType<16384, 142>; // 512 x f32 vector value +def v1024f32 : ValueType<32768, 143>; // 1024 x f32 vector value +def v2048f32 : ValueType<65536, 144>; // 2048 x f32 vector value + +def v1f64 : ValueType<64, 145>; // 1 x f64 vector value +def v2f64 : ValueType<128, 146>; // 2 x f64 vector value +def v3f64 : ValueType<192, 147>; // 3 x f64 vector value +def v4f64 : ValueType<256, 148>; // 4 x f64 vector value +def v8f64 : ValueType<512, 149>; // 8 x f64 vector value +def v16f64 : ValueType<1024, 150>; // 16 x f64 vector value +def v32f64 : ValueType<2048, 151>; // 32 x f64 vector value +def v64f64 : ValueType<4096, 152>; // 64 x f64 vector value +def v128f64 : ValueType<8192, 153>; // 128 x f64 vector value +def v256f64 : ValueType<16384, 154>; // 256 x f64 vector value + +def nxv1i1 : ValueType<1, 155>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 156>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 157>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 158>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 159>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 160>; // n x 32 x i1 vector value +def nxv64i1 : ValueType<64, 161>; // n x 64 x i1 vector value + +def nxv1i8 : ValueType<8, 162>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 163>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 164>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 165>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 166>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 167>; // n x 32 x i8 vector value +def nxv64i8 : ValueType<512, 168>; // n x 64 x i8 vector value + +def nxv1i16 : ValueType<16, 169>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 170>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 171>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 172>; // n x 8 x i16 vector value +def nxv16i16 : ValueType<256, 173>; // n x 16 x i16 vector value +def nxv32i16 : ValueType<512, 174>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 175>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 176>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 177>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 178>; // n x 8 x i32 vector value +def nxv16i32 : ValueType<512, 179>; // n x 16 x i32 vector value +def nxv32i32 : ValueType<1024, 180>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 181>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 182>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 183>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 184>; // n x 8 x i64 vector value +def nxv16i64 : ValueType<1024, 185>; // n x 16 x i64 vector value +def nxv32i64 : ValueType<2048, 186>; // n x 32 x i64 vector value + +def nxv1f8e4m3 : ValueType<8, 187>; // n x 1 x f8e4m3 vector value +def nxv2f8e4m3 : ValueType<16, 188>; // n x 2 x f8e4m3 vector value +def nxv4f8e4m3 : ValueType<32, 189>; // n x 4 x f8e4m3 vector value +def nxv8f8e4m3 : ValueType<64, 190>; // n x 8 x f8e4m3 vector value + +def nxv1f8e5m2 : ValueType<8, 191>; // n x 1 x f8e5m2 vector value +def nxv2f8e5m2 : ValueType<16, 192>; // n x 2 x f8e5m2 vector value +def nxv4f8e5m2 : ValueType<32, 193>; // n x 4 x f8e5m2 vector value +def nxv8f8e5m2 : ValueType<64, 194>; // n x 8 x f8e5m2 vector value + +def nxv1f16 : ValueType<16, 195>; // n x 1 x f16 vector value +def nxv2f16 : ValueType<32, 196>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64, 197>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 198>; // n x 8 x f16 vector value +def nxv16f16 : ValueType<256, 199>; // n x 16 x f16 vector value +def nxv32f16 : ValueType<512, 200>; // n x 32 x f16 vector value + +def nxv1bf16 : ValueType<16, 201>; // n x 1 x bf16 vector value +def nxv2bf16 : ValueType<32, 202>; // n x 2 x bf16 vector value +def nxv4bf16 : ValueType<64, 203>; // n x 4 x bf16 vector value +def nxv8bf16 : ValueType<128, 204>; // n x 8 x bf16 vector value +def nxv16bf16 : ValueType<256, 205>; // n x 16 x bf16 vector value +def nxv32bf16 : ValueType<512, 206>; // n x 32 x bf16 vector value + +def nxv1f32 : ValueType<32, 207>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64, 208>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 209>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 210>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 211>; // n x 16 x f32 vector value + +def nxv1f64 : ValueType<64, 212>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 213>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 214>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 215>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64, 216>; // X86 MMX value +def FlagVT : ValueType<0, 217>; // Pre-RA sched glue +def isVoid : ValueType<0, 218>; // Produces no value +def untyped : ValueType<8, 219>; // Produces an untyped value +def funcref : ValueType<0, 220>; // WebAssembly's funcref type +def externref : ValueType<0, 221>; // WebAssembly's externref type +def x86amx : ValueType<8192, 222>; // X86 AMX value +def i64x8 : ValueType<512, 223>; // 8 Consecutive GPRs (AArch64) def token : ValueType<0, 248>; // TokenTy def MetadataVT : ValueType<0, 249>; // Metadata diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -688,6 +688,9 @@ getStructLayout(cast(Ty))->getSizeInBits()); case Type::IntegerTyID: return TypeSize::Fixed(Ty->getIntegerBitWidth()); + case Type::F8E5M2TyID: + case Type::F8E4M3TyID: + return TypeSize::Fixed(8); case Type::HalfTyID: case Type::BFloatTyID: return TypeSize::Fixed(16); diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -526,6 +526,12 @@ return Type::getIntNTy(Context, N); } + /// Fetch the type representing a 8-bit floating point value. + Type *getF8E5M2Ty() { return Type::getF8E5M2Ty(Context); } + + /// Fetch the type representing a 8-bit floating point value. + Type *getF8E4M3Ty() { return Type::getF8E4M3Ty(Context); } + /// Fetch the type representing a 16-bit floating point value. Type *getHalfTy() { return Type::getHalfTy(Context); diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -55,6 +55,8 @@ // PrimitiveTypes HalfTyID = 0, ///< 16-bit floating point type BFloatTyID, ///< 16-bit floating point type (7-bit significand) + F8E5M2TyID, ///< 8-bit floating type (5 Bit exponent) + F8E4M3TyID, ///< 8-bit floating type (4 Bit exponent) FloatTyID, ///< 32-bit floating point type DoubleTyID, ///< 64-bit floating point type X86_FP80TyID, ///< 80-bit floating point type (X87) @@ -141,6 +143,12 @@ /// Return true if this is 'half', a 16-bit IEEE fp type. bool isHalfTy() const { return getTypeID() == HalfTyID; } + /// Return true if this is 'F8E5M2'. + bool isF8E5M2() const { return getTypeID() == F8E5M2TyID; } + + /// Return true if this is 'F8E4M3'. + bool isF8E4M3() const { return getTypeID() == F8E4M3TyID; } + /// Return true if this is 'bfloat', a 16-bit bfloat type. bool isBFloatTy() const { return getTypeID() == BFloatTyID; } @@ -149,6 +157,11 @@ return getTypeID() == BFloatTyID || getTypeID() == HalfTyID; } + /// Return true if this is an 8-bit float type. + bool is8BitFPTy() const { + return getTypeID() == F8E5M2TyID || getTypeID() == F8E4M3TyID; + } + /// Return true if this is 'float', a 32-bit IEEE fp type. bool isFloatTy() const { return getTypeID() == FloatTyID; } @@ -174,6 +187,8 @@ case HalfTyID: case BFloatTyID: case FP128TyID: + case F8E5M2TyID: + case F8E4M3TyID: return true; default: return false; @@ -435,6 +450,8 @@ static Type *getVoidTy(LLVMContext &C); static Type *getLabelTy(LLVMContext &C); static Type *getHalfTy(LLVMContext &C); + static Type *getF8E5M2Ty(LLVMContext &C); + static Type *getF8E4M3Ty(LLVMContext &C); static Type *getBFloatTy(LLVMContext &C); static Type *getFloatTy(LLVMContext &C); static Type *getDoubleTy(LLVMContext &C); @@ -472,6 +489,8 @@ // Convenience methods for getting pointer types with one of the above builtin // types as pointee. // + static PointerType *getF8E5M2PtrTy(LLVMContext &C, unsigned AS = 0); + static PointerType *getF8E4M3PtrTy(LLVMContext &C, unsigned AS = 0); static PointerType *getHalfPtrTy(LLVMContext &C, unsigned AS = 0); static PointerType *getBFloatPtrTy(LLVMContext &C, unsigned AS = 0); static PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0); diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -52,223 +52,254 @@ FIRST_INTEGER_VALUETYPE = i1, LAST_INTEGER_VALUETYPE = i128, - bf16 = 10, // This is a 16 bit brain floating point value - f16 = 11, // This is a 16 bit floating point value - f32 = 12, // This is a 32 bit floating point value - f64 = 13, // This is a 64 bit floating point value - f80 = 14, // This is a 80 bit floating point value - f128 = 15, // This is a 128 bit floating point value - ppcf128 = 16, // This is a PPC 128-bit floating point value - - FIRST_FP_VALUETYPE = bf16, + f8e4m3 = 10, // This is an 8 bit floating point value (e4m3) + f8e5m2 = 11, // This is an 8 bit floating point value (e5m2) + bf16 = 12, // This is a 16 bit brain floating point value + f16 = 13, // This is a 16 bit floating point value + f32 = 14, // This is a 32 bit floating point value + f64 = 15, // This is a 64 bit floating point value + f80 = 16, // This is a 80 bit floating point value + f128 = 17, // This is a 128 bit floating point value + ppcf128 = 18, // This is a PPC 128-bit floating point value + + FIRST_FP_VALUETYPE = f8e4m3, LAST_FP_VALUETYPE = ppcf128, - v1i1 = 17, // 1 x i1 - v2i1 = 18, // 2 x i1 - v4i1 = 19, // 4 x i1 - v8i1 = 20, // 8 x i1 - v16i1 = 21, // 16 x i1 - v32i1 = 22, // 32 x i1 - v64i1 = 23, // 64 x i1 - v128i1 = 24, // 128 x i1 - v256i1 = 25, // 256 x i1 - v512i1 = 26, // 512 x i1 - v1024i1 = 27, // 1024 x i1 - v2048i1 = 28, // 2048 x i1 - - v128i2 = 29, // 128 x i2 - v256i2 = 30, // 256 x i2 - - v64i4 = 31, // 64 x i4 - v128i4 = 32, // 128 x i4 - - v1i8 = 33, // 1 x i8 - v2i8 = 34, // 2 x i8 - v4i8 = 35, // 4 x i8 - v8i8 = 36, // 8 x i8 - v16i8 = 37, // 16 x i8 - v32i8 = 38, // 32 x i8 - v64i8 = 39, // 64 x i8 - v128i8 = 40, // 128 x i8 - v256i8 = 41, // 256 x i8 - v512i8 = 42, // 512 x i8 - v1024i8 = 43, // 1024 x i8 - - v1i16 = 44, // 1 x i16 - v2i16 = 45, // 2 x i16 - v3i16 = 46, // 3 x i16 - v4i16 = 47, // 4 x i16 - v8i16 = 48, // 8 x i16 - v16i16 = 49, // 16 x i16 - v32i16 = 50, // 32 x i16 - v64i16 = 51, // 64 x i16 - v128i16 = 52, // 128 x i16 - v256i16 = 53, // 256 x i16 - v512i16 = 54, // 512 x i16 - - v1i32 = 55, // 1 x i32 - v2i32 = 56, // 2 x i32 - v3i32 = 57, // 3 x i32 - v4i32 = 58, // 4 x i32 - v5i32 = 59, // 5 x i32 - v6i32 = 60, // 6 x i32 - v7i32 = 61, // 7 x i32 - v8i32 = 62, // 8 x i32 - v9i32 = 63, // 9 x i32 - v10i32 = 64, // 10 x i32 - v11i32 = 65, // 11 x i32 - v12i32 = 66, // 12 x i32 - v16i32 = 67, // 16 x i32 - v32i32 = 68, // 32 x i32 - v64i32 = 69, // 64 x i32 - v128i32 = 70, // 128 x i32 - v256i32 = 71, // 256 x i32 - v512i32 = 72, // 512 x i32 - v1024i32 = 73, // 1024 x i32 - v2048i32 = 74, // 2048 x i32 - - v1i64 = 75, // 1 x i64 - v2i64 = 76, // 2 x i64 - v3i64 = 77, // 3 x i64 - v4i64 = 78, // 4 x i64 - v8i64 = 79, // 8 x i64 - v16i64 = 80, // 16 x i64 - v32i64 = 81, // 32 x i64 - v64i64 = 82, // 64 x i64 - v128i64 = 83, // 128 x i64 - v256i64 = 84, // 256 x i64 - - v1i128 = 85, // 1 x i128 + v1i1 = 19, // 1 x i1 + v2i1 = 20, // 2 x i1 + v4i1 = 21, // 4 x i1 + v8i1 = 22, // 8 x i1 + v16i1 = 23, // 16 x i1 + v32i1 = 24, // 32 x i1 + v64i1 = 25, // 64 x i1 + v128i1 = 26, // 128 x i1 + v256i1 = 27, // 256 x i1 + v512i1 = 28, // 512 x i1 + v1024i1 = 29, // 1024 x i1 + v2048i1 = 30, // 2048 x i1 + + v128i2 = 31, // 128 x i2 + v256i2 = 32, // 256 x i2 + + v64i4 = 33, // 64 x i4 + v128i4 = 34, // 128 x i45 + v1i8 = 35, // 1 x i8 + v2i8 = 36, // 2 x i8 + v4i8 = 37, // 4 x i8 + v8i8 = 38, // 8 x i8 + v16i8 = 39, // 16 x i8 + v32i8 = 40, // 32 x i8 + v64i8 = 41, // 64 x i8 + v128i8 = 42, // 128 x i8 + v256i8 = 43, // 256 x i8 + v512i8 = 44, // 512 x i8 + v1024i8 = 45, // 1024 x i8 + + v1i16 = 46, // 1 x i16 + v2i16 = 47, // 2 x i16 + v3i16 = 48, // 3 x i16 + v4i16 = 49, // 4 x i16 + v8i16 = 50, // 8 x i16 + v16i16 = 51, // 16 x i16 + v32i16 = 52, // 32 x i16 + v64i16 = 53, // 64 x i16 + v128i16 = 54, // 128 x i16 + v256i16 = 55, // 256 x i16 + v512i16 = 56, // 512 x i16 + + v1i32 = 57, // 1 x i32 + v2i32 = 58, // 2 x i32 + v3i32 = 59, // 3 x i32 + v4i32 = 60, // 4 x i32 + v5i32 = 61, // 5 x i32 + v6i32 = 62, // 6 x i32 + v7i32 = 63, // 7 x i32 + v8i32 = 64, // 8 x i32 + v9i32 = 65, // 9 x i32 + v10i32 = 66, // 10 x i32 + v11i32 = 67, // 11 x i32 + v12i32 = 68, // 12 x i32 + v16i32 = 69, // 16 x i32 + v32i32 = 70, // 32 x i32 + v64i32 = 71, // 64 x i32 + v128i32 = 72, // 128 x i32 + v256i32 = 73, // 256 x i32 + v512i32 = 74, // 512 x i32 + v1024i32 = 75, // 1024 x i32 + v2048i32 = 76, // 2048 x i32 + + v1i64 = 77, // 1 x i64 + v2i64 = 78, // 2 x i64 + v3i64 = 79, // 3 x i64 + v4i64 = 80, // 4 x i64 + v8i64 = 81, // 8 x i64 + v16i64 = 82, // 16 x i64 + v32i64 = 83, // 32 x i64 + v64i64 = 84, // 64 x i64 + v128i64 = 85, // 128 x i64 + v256i64 = 86, // 256 x i64 + + v1i128 = 87, // 1 x i128 FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1, LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128, - v1f16 = 86, // 1 x f16 - v2f16 = 87, // 2 x f16 - v3f16 = 88, // 3 x f16 - v4f16 = 89, // 4 x f16 - v8f16 = 90, // 8 x f16 - v16f16 = 91, // 16 x f16 - v32f16 = 92, // 32 x f16 - v64f16 = 93, // 64 x f16 - v128f16 = 94, // 128 x f16 - v256f16 = 95, // 256 x f16 - v512f16 = 96, // 512 x f16 - - v2bf16 = 97, // 2 x bf16 - v3bf16 = 98, // 3 x bf16 - v4bf16 = 99, // 4 x bf16 - v8bf16 = 100, // 8 x bf16 - v16bf16 = 101, // 16 x bf16 - v32bf16 = 102, // 32 x bf16 - v64bf16 = 103, // 64 x bf16 - v128bf16 = 104, // 128 x bf16 - - v1f32 = 105, // 1 x f32 - v2f32 = 106, // 2 x f32 - v3f32 = 107, // 3 x f32 - v4f32 = 108, // 4 x f32 - v5f32 = 109, // 5 x f32 - v6f32 = 110, // 6 x f32 - v7f32 = 111, // 7 x f32 - v8f32 = 112, // 8 x f32 - v9f32 = 113, // 9 x f32 - v10f32 = 114, // 10 x f32 - v11f32 = 115, // 11 x f32 - v12f32 = 116, // 12 x f32 - v16f32 = 117, // 16 x f32 - - v32f32 = 118, // 32 x f32 - v64f32 = 119, // 64 x f32 - v128f32 = 120, // 128 x f32 - v256f32 = 121, // 256 x f32 - v512f32 = 122, // 512 x f32 - v1024f32 = 123, // 1024 x f32 - v2048f32 = 124, // 2048 x f32 - - v1f64 = 125, // 1 x f64 - v2f64 = 126, // 2 x f64 - v3f64 = 127, // 3 x f64 - v4f64 = 128, // 4 x f64 - v8f64 = 129, // 8 x f64 - v16f64 = 130, // 16 x f64 - v32f64 = 131, // 32 x f64 - v64f64 = 132, // 64 x f64 - v128f64 = 133, // 128 x f64 - v256f64 = 134, // 256 x f64 - - FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v1f16, + v1f8e4m3 = 88, // 1 x f8e4m3 + v2f8e4m3 = 89, // 2 x f8e4m3 + v3f8e4m3 = 90, // 3 x f8e4m3 + v4f8e4m3 = 91, // 4 x f8e4m3 + v8f8e4m3 = 92, // 8 x f8e4m3 + v16f8e4m3 = 93, // 16 x f8e4m3 + v32f8e4m3 = 94, // 32 x f8e4m3 + v64f8e4m3 = 95, // 64 x f8e4m3 + v128f8e4m3 = 96, // 128 x f8e4m3 + + v1f8e5m2 = 97, // 1 x f8e5m2 + v2f8e5m2 = 98, // 2 x f8e5m2 + v3f8e5m2 = 99, // 3 x f8e5m2 + v4f8e5m2 = 100, // 4 x f8e5m2 + v8f8e5m2 = 101, // 8 x f8e5m2 + v16f8e5m2 = 102, // 16 x f8e5m2 + v32f8e5m2 = 103, // 32 x f8e5m2 + v64f8e5m2 = 104, // 64 x f8e5m2 + v128f8e5m2 = 105, // 128 x f8e5m2 + + v1f16 = 106, // 1 x f16 + v2f16 = 107, // 2 x f16 + v3f16 = 108, // 3 x f16 + v4f16 = 109, // 4 x f16 + v8f16 = 110, // 8 x f16 + v16f16 = 111, // 16 x f16 + v32f16 = 112, // 32 x f16 + v64f16 = 113, // 64 x f16 + v128f16 = 114, // 128 x f16 + v256f16 = 115, // 256 x f16 + v512f16 = 116, // 512 x f16 + + v2bf16 = 117, // 2 x bf16 + v3bf16 = 118, // 3 x bf16 + v4bf16 = 119, // 4 x bf16 + v8bf16 = 120, // 8 x bf16 + v16bf16 = 121, // 16 x bf16 + v32bf16 = 122, // 32 x bf16 + v64bf16 = 123, // 64 x bf16 + v128bf16 = 124, // 128 x bf16 + + v1f32 = 125, // 1 x f32 + v2f32 = 126, // 2 x f32 + v3f32 = 127, // 3 x f32 + v4f32 = 128, // 4 x f32 + v5f32 = 129, // 5 x f32 + v6f32 = 130, // 6 x f32 + v7f32 = 131, // 7 x f32 + v8f32 = 132, // 8 x f32 + v9f32 = 133, // 9 x f32 + v10f32 = 134, // 10 x f32 + v11f32 = 135, // 11 x f32 + v12f32 = 136, // 12 x f32 + v16f32 = 137, // 16 x f32 + + v32f32 = 138, // 32 x f32 + v64f32 = 139, // 64 x f32 + v128f32 = 140, // 128 x f32 + v256f32 = 141, // 256 x f32 + v512f32 = 142, // 512 x f32 + v1024f32 = 143, // 1024 x f32 + v2048f32 = 144, // 2048 x f32 + + v1f64 = 145, // 1 x f64 + v2f64 = 146, // 2 x f64 + v3f64 = 147, // 3 x f64 + v4f64 = 148, // 4 x f64 + v8f64 = 149, // 8 x f64 + v16f64 = 150, // 16 x f64 + v32f64 = 151, // 32 x f64 + v64f64 = 152, // 64 x f64 + v128f64 = 153, // 128 x f64 + v256f64 = 154, // 256 x f64 + + FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v1f8e4m3, LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v256f64, FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1, LAST_FIXEDLEN_VECTOR_VALUETYPE = v256f64, - nxv1i1 = 135, // n x 1 x i1 - nxv2i1 = 136, // n x 2 x i1 - nxv4i1 = 137, // n x 4 x i1 - nxv8i1 = 138, // n x 8 x i1 - nxv16i1 = 139, // n x 16 x i1 - nxv32i1 = 140, // n x 32 x i1 - nxv64i1 = 141, // n x 64 x i1 - - nxv1i8 = 142, // n x 1 x i8 - nxv2i8 = 143, // n x 2 x i8 - nxv4i8 = 144, // n x 4 x i8 - nxv8i8 = 145, // n x 8 x i8 - nxv16i8 = 146, // n x 16 x i8 - nxv32i8 = 147, // n x 32 x i8 - nxv64i8 = 148, // n x 64 x i8 - - nxv1i16 = 149, // n x 1 x i16 - nxv2i16 = 150, // n x 2 x i16 - nxv4i16 = 151, // n x 4 x i16 - nxv8i16 = 152, // n x 8 x i16 - nxv16i16 = 153, // n x 16 x i16 - nxv32i16 = 154, // n x 32 x i16 - - nxv1i32 = 155, // n x 1 x i32 - nxv2i32 = 156, // n x 2 x i32 - nxv4i32 = 157, // n x 4 x i32 - nxv8i32 = 158, // n x 8 x i32 - nxv16i32 = 159, // n x 16 x i32 - nxv32i32 = 160, // n x 32 x i32 - - nxv1i64 = 161, // n x 1 x i64 - nxv2i64 = 162, // n x 2 x i64 - nxv4i64 = 163, // n x 4 x i64 - nxv8i64 = 164, // n x 8 x i64 - nxv16i64 = 165, // n x 16 x i64 - nxv32i64 = 166, // n x 32 x i64 + nxv1i1 = 155, // n x 1 x i1 + nxv2i1 = 156, // n x 2 x i1 + nxv4i1 = 157, // n x 4 x i1 + nxv8i1 = 158, // n x 8 x i1 + nxv16i1 = 159, // n x 16 x i1 + nxv32i1 = 160, // n x 32 x i1 + nxv64i1 = 161, // n x 64 x i1 + + nxv1i8 = 162, // n x 1 x i8 + nxv2i8 = 163, // n x 2 x i8 + nxv4i8 = 164, // n x 4 x i8 + nxv8i8 = 165, // n x 8 x i8 + nxv16i8 = 166, // n x 16 x i8 + nxv32i8 = 167, // n x 32 x i8 + nxv64i8 = 168, // n x 64 x i8 + + nxv1i16 = 169, // n x 1 x i16 + nxv2i16 = 170, // n x 2 x i16 + nxv4i16 = 171, // n x 4 x i16 + nxv8i16 = 172, // n x 8 x i16 + nxv16i16 = 173, // n x 16 x i16 + nxv32i16 = 174, // n x 32 x i16 + + nxv1i32 = 175, // n x 1 x i32 + nxv2i32 = 176, // n x 2 x i32 + nxv4i32 = 177, // n x 4 x i32 + nxv8i32 = 178, // n x 8 x i32 + nxv16i32 = 179, // n x 16 x i32 + nxv32i32 = 180, // n x 32 x i32 + + nxv1i64 = 181, // n x 1 x i64 + nxv2i64 = 182, // n x 2 x i64 + nxv4i64 = 183, // n x 4 x i64 + nxv8i64 = 184, // n x 8 x i64 + nxv16i64 = 185, // n x 16 x i64 + nxv32i64 = 186, // n x 32 x i64 FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1, LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64, - nxv1f16 = 167, // n x 1 x f16 - nxv2f16 = 168, // n x 2 x f16 - nxv4f16 = 169, // n x 4 x f16 - nxv8f16 = 170, // n x 8 x f16 - nxv16f16 = 171, // n x 16 x f16 - nxv32f16 = 172, // n x 32 x f16 - - nxv1bf16 = 173, // n x 1 x bf16 - nxv2bf16 = 174, // n x 2 x bf16 - nxv4bf16 = 175, // n x 4 x bf16 - nxv8bf16 = 176, // n x 8 x bf16 - nxv16bf16 = 177, // n x 16 x bf16 - nxv32bf16 = 178, // n x 32 x bf16 - - nxv1f32 = 179, // n x 1 x f32 - nxv2f32 = 180, // n x 2 x f32 - nxv4f32 = 181, // n x 4 x f32 - nxv8f32 = 182, // n x 8 x f32 - nxv16f32 = 183, // n x 16 x f32 - - nxv1f64 = 184, // n x 1 x f64 - nxv2f64 = 185, // n x 2 x f64 - nxv4f64 = 186, // n x 4 x f64 - nxv8f64 = 187, // n x 8 x f64 - - FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv1f16, + nxv1f8e4m3 = 187, // n x 1 x f8e4m3 + nxv2f8e4m3 = 188, // n x 2 x f8e4m3 + nxv4f8e4m3 = 189, // n x 4 x f8e4m3 + nxv8f8e4m3 = 190, // n x 8 x f8e4m3 + + nxv1f8e5m2 = 191, // n x 1 x f8e5m2 + nxv2f8e5m2 = 192, // n x 2 x f8e5m2 + nxv4f8e5m2 = 193, // n x 4 x f8e5m2 + nxv8f8e5m2 = 194, // n x 8 x f8e5m2 + + nxv1f16 = 195, // n x 1 x f16 + nxv2f16 = 196, // n x 2 x f16 + nxv4f16 = 197, // n x 4 x f16 + nxv8f16 = 198, // n x 8 x f16 + nxv16f16 = 199, // n x 16 x f16 + nxv32f16 = 200, // n x 32 x f16 + + nxv1bf16 = 201, // n x 1 x bf16 + nxv2bf16 = 202, // n x 2 x bf16 + nxv4bf16 = 203, // n x 4 x bf16 + nxv8bf16 = 204, // n x 8 x bf16 + nxv16bf16 = 205, // n x 16 x bf16 + nxv32bf16 = 206, // n x 32 x bf16 + + nxv1f32 = 207, // n x 1 x f32 + nxv2f32 = 208, // n x 2 x f32 + nxv4f32 = 209, // n x 4 x f32 + nxv8f32 = 210, // n x 8 x f32 + nxv16f32 = 211, // n x 16 x f32 + + nxv1f64 = 212, // n x 1 x f64 + nxv2f64 = 213, // n x 2 x f64 + nxv4f64 = 214, // n x 4 x f64 + nxv8f64 = 215, // n x 8 x f64 + + FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv1f8e4m3, LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64, FIRST_SCALABLE_VECTOR_VALUETYPE = nxv1i1, @@ -277,20 +308,20 @@ FIRST_VECTOR_VALUETYPE = v1i1, LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 188, // This is an X86 MMX value + x86mmx = 216, // This is an X86 MMX value - Glue = 189, // This glues nodes together during pre-RA sched + Glue = 217, // This glues nodes together during pre-RA sched - isVoid = 190, // This has no value + isVoid = 218, // This has no value - Untyped = 191, // This value takes a register, but has + Untyped = 219, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. - funcref = 192, // WebAssembly's funcref type - externref = 193, // WebAssembly's externref type - x86amx = 194, // This is an X86 AMX value - i64x8 = 195, // 8 Consecutive GPRs (AArch64) + funcref = 220, // WebAssembly's funcref type + externref = 221, // WebAssembly's externref type + x86amx = 222, // This is an X86 AMX value + i64x8 = 223, // 8 Consecutive GPRs (AArch64) FIRST_VALUETYPE = 1, // This is always the beginning of the list. LAST_VALUETYPE = i64x8, // This always remains at the end of the list. @@ -402,62 +433,69 @@ /// Return true if this is a 16-bit vector type. bool is16BitVector() const { - return (SimpleTy == MVT::v2i8 || SimpleTy == MVT::v1i16 || - SimpleTy == MVT::v16i1 || SimpleTy == MVT::v1f16); + return (SimpleTy == MVT::v2i8 || SimpleTy == MVT::v1i16 || + SimpleTy == MVT::v2f8e4m3 || SimpleTy == MVT::v2f8e5m2 || + SimpleTy == MVT::v16i1 || SimpleTy == MVT::v1f16); } /// Return true if this is a 32-bit vector type. bool is32BitVector() const { - return (SimpleTy == MVT::v32i1 || SimpleTy == MVT::v4i8 || - SimpleTy == MVT::v2i16 || SimpleTy == MVT::v1i32 || - SimpleTy == MVT::v2f16 || SimpleTy == MVT::v2bf16 || + return (SimpleTy == MVT::v32i1 || SimpleTy == MVT::v4i8 || + SimpleTy == MVT::v2i16 || SimpleTy == MVT::v1i32 || + SimpleTy == MVT::v4f8e4m3 || SimpleTy == MVT::v4f8e5m2 || + SimpleTy == MVT::v2f16 || SimpleTy == MVT::v2bf16 || SimpleTy == MVT::v1f32); } /// Return true if this is a 64-bit vector type. bool is64BitVector() const { - return (SimpleTy == MVT::v64i1 || SimpleTy == MVT::v8i8 || - SimpleTy == MVT::v4i16 || SimpleTy == MVT::v2i32 || - SimpleTy == MVT::v1i64 || SimpleTy == MVT::v4f16 || - SimpleTy == MVT::v4bf16 ||SimpleTy == MVT::v2f32 || + return (SimpleTy == MVT::v64i1 || SimpleTy == MVT::v8i8 || + SimpleTy == MVT::v4i16 || SimpleTy == MVT::v2i32 || + SimpleTy == MVT::v8f8e4m3 || SimpleTy == MVT::v8f8e5m2 || + SimpleTy == MVT::v1i64 || SimpleTy == MVT::v4f16 || + SimpleTy == MVT::v4bf16 ||SimpleTy == MVT::v2f32 || SimpleTy == MVT::v1f64); } /// Return true if this is a 128-bit vector type. bool is128BitVector() const { - return (SimpleTy == MVT::v128i1 || SimpleTy == MVT::v16i8 || - SimpleTy == MVT::v8i16 || SimpleTy == MVT::v4i32 || - SimpleTy == MVT::v2i64 || SimpleTy == MVT::v1i128 || - SimpleTy == MVT::v8f16 || SimpleTy == MVT::v8bf16 || - SimpleTy == MVT::v4f32 || SimpleTy == MVT::v2f64); + return (SimpleTy == MVT::v128i1 || SimpleTy == MVT::v16i8 || + SimpleTy == MVT::v8i16 || SimpleTy == MVT::v4i32 || + SimpleTy == MVT::v2i64 || SimpleTy == MVT::v1i128 || + SimpleTy == MVT::v16f8e4m3 || SimpleTy == MVT::v16f8e5m2 || + SimpleTy == MVT::v8f16 || SimpleTy == MVT::v8bf16 || + SimpleTy == MVT::v4f32 || SimpleTy == MVT::v2f64); } /// Return true if this is a 256-bit vector type. bool is256BitVector() const { - return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v16bf16 || - SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64 || - SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 || - SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64 || - SimpleTy == MVT::v256i1 || SimpleTy == MVT::v128i2 || + return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v16bf16 || + SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64 || + SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 || + SimpleTy == MVT::v32f8e4m3 || SimpleTy == MVT::v32f8e5m2 || + SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64 || + SimpleTy == MVT::v256i1 || SimpleTy == MVT::v128i2 || SimpleTy == MVT::v64i4); } /// Return true if this is a 512-bit vector type. bool is512BitVector() const { - return (SimpleTy == MVT::v32f16 || SimpleTy == MVT::v32bf16 || - SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64 || - SimpleTy == MVT::v512i1 || SimpleTy == MVT::v256i2 || - SimpleTy == MVT::v128i4 || SimpleTy == MVT::v64i8 || - SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 || + return (SimpleTy == MVT::v32f16 || SimpleTy == MVT::v32bf16 || + SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64 || + SimpleTy == MVT::v512i1 || SimpleTy == MVT::v256i2 || + SimpleTy == MVT::v128i4 || SimpleTy == MVT::v64i8 || + SimpleTy == MVT::v64f8e4m3 || SimpleTy == MVT::v64f8e5m2 || + SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 || SimpleTy == MVT::v8i64); } /// Return true if this is a 1024-bit vector type. bool is1024BitVector() const { - return (SimpleTy == MVT::v1024i1 || SimpleTy == MVT::v128i8 || - SimpleTy == MVT::v64i16 || SimpleTy == MVT::v32i32 || - SimpleTy == MVT::v16i64 || SimpleTy == MVT::v64f16 || - SimpleTy == MVT::v32f32 || SimpleTy == MVT::v16f64 || + return (SimpleTy == MVT::v1024i1 || SimpleTy == MVT::v128i8 || + SimpleTy == MVT::v64i16 || SimpleTy == MVT::v32i32 || + SimpleTy == MVT::v128f8e4m3 || SimpleTy == MVT::v128f8e5m2 || + SimpleTy == MVT::v16i64 || SimpleTy == MVT::v64f16 || + SimpleTy == MVT::v32f32 || SimpleTy == MVT::v16f64 || SimpleTy == MVT::v64bf16); } @@ -645,6 +683,32 @@ case nxv16i64: case nxv32i64: return i64; case v1i128: return i128; + case v1f8e4m3: + case v2f8e4m3: + case v3f8e4m3: + case v4f8e4m3: + case v8f8e4m3: + case v16f8e4m3: + case v32f8e4m3: + case v64f8e4m3: + case v128f8e4m3: + case nxv1f8e4m3: + case nxv2f8e4m3: + case nxv4f8e4m3: + case nxv8f8e4m3: return f8e4m3; + case v1f8e5m2: + case v2f8e5m2: + case v3f8e5m2: + case v4f8e5m2: + case v8f8e5m2: + case v16f8e5m2: + case v32f8e5m2: + case v64f8e5m2: + case v128f8e5m2: + case nxv1f8e5m2: + case nxv2f8e5m2: + case nxv4f8e5m2: + case nxv8f8e5m2: return f8e5m2; case v1f16: case v2f16: case v3f16: @@ -753,6 +817,8 @@ case v128i16: case v128i32: case v128i64: + case v128f8e4m3: + case v128f8e5m2: case v128f16: case v128bf16: case v128f32: @@ -763,6 +829,8 @@ case v64i16: case v64i32: case v64i64: + case v64f8e4m3: + case v64f8e5m2: case v64f16: case v64bf16: case v64f32: @@ -783,6 +851,8 @@ case nxv32i16: case nxv32i32: case nxv32i64: + case v32f8e4m3: + case v32f8e5m2: case nxv32f16: case nxv32bf16: return 32; case v16i1: @@ -799,6 +869,8 @@ case nxv16i16: case nxv16i32: case nxv16i64: + case v16f8e4m3: + case v16f8e5m2: case nxv16f16: case nxv16bf16: case nxv16f32: return 16; @@ -816,6 +888,8 @@ case v8i32: case v8i64: case v8f16: + case v8f8e4m3: + case v8f8e5m2: case v8bf16: case v8f32: case v8f64: @@ -824,6 +898,8 @@ case nxv8i16: case nxv8i32: case nxv8i64: + case nxv8f8e4m3: + case nxv8f8e5m2: case nxv8f16: case nxv8bf16: case nxv8f32: @@ -839,6 +915,8 @@ case v4i16: case v4i32: case v4i64: + case v4f8e4m3: + case v4f8e5m2: case v4f16: case v4bf16: case v4f32: @@ -848,6 +926,8 @@ case nxv4i16: case nxv4i32: case nxv4i64: + case nxv4f8e4m3: + case nxv4f8e5m2: case nxv4f16: case nxv4bf16: case nxv4f32: @@ -855,6 +935,8 @@ case v3i16: case v3i32: case v3i64: + case v3f8e4m3: + case v3f8e5m2: case v3f16: case v3bf16: case v3f32: @@ -866,6 +948,8 @@ case v2i64: case v2f16: case v2bf16: + case v2f8e4m3: + case v2f8e5m2: case v2f32: case v2f64: case nxv2i1: @@ -873,6 +957,8 @@ case nxv2i16: case nxv2i32: case nxv2i64: + case nxv2f8e4m3: + case nxv2f8e5m2: case nxv2f16: case nxv2bf16: case nxv2f32: @@ -883,6 +969,8 @@ case v1i32: case v1i64: case v1i128: + case v1f8e4m3: + case v1f8e5m2: case v1f16: case v1f32: case v1f64: @@ -891,6 +979,8 @@ case nxv1i16: case nxv1i32: case nxv1i64: + case nxv1f8e4m3: + case nxv1f8e5m2: case nxv1f16: case nxv1bf16: case nxv1f32: @@ -946,25 +1036,39 @@ case nxv4i1: return TypeSize::Scalable(4); case i8 : case v1i8: - case v8i1: return TypeSize::Fixed(8); + case v8i1: + case f8e4m3: + case f8e5m2: + case v1f8e4m3: + case v1f8e5m2: return TypeSize::Fixed(8); case nxv1i8: - case nxv8i1: return TypeSize::Scalable(8); + case nxv8i1: + case nxv1f8e4m3: + case nxv1f8e5m2: return TypeSize::Scalable(8); case i16 : case f16: case bf16: case v16i1: case v2i8: + case v2f8e4m3: + case v2f8e5m2: case v1i16: case v1f16: return TypeSize::Fixed(16); case nxv16i1: case nxv2i8: case nxv1i16: case nxv1bf16: + case nxv2f8e4m3: + case nxv2f8e5m2: case nxv1f16: return TypeSize::Scalable(16); + case v3f8e4m3: + case v3f8e5m2: return TypeSize::Scalable(24); case f32 : case i32 : case v32i1: case v4i8: + case v4f8e4m3: + case v4f8e5m2: case v2i16: case v2f16: case v2bf16: @@ -974,6 +1078,8 @@ case nxv4i8: case nxv2i16: case nxv1i32: + case nxv4f8e4m3: + case nxv4f8e5m2: case nxv2f16: case nxv2bf16: case nxv1f32: return TypeSize::Scalable(32); @@ -988,6 +1094,8 @@ case v4i16: case v2i32: case v1i64: + case v8f8e4m3: + case v8f8e5m2: case v4f16: case v4bf16: case v2f32: @@ -997,6 +1105,8 @@ case nxv4i16: case nxv2i32: case nxv1i64: + case nxv8f8e4m3: + case nxv8f8e5m2: case nxv4f16: case nxv4bf16: case nxv2f32: @@ -1013,6 +1123,8 @@ case v4i32: case v2i64: case v1i128: + case v16f8e4m3: + case v16f8e5m2: case v8f16: case v8bf16: case v4f32: @@ -1040,6 +1152,8 @@ case v16i16: case v8i32: case v4i64: + case v32f8e4m3: + case v32f8e5m2: case v16f16: case v16bf16: case v8f32: @@ -1068,6 +1182,8 @@ case v32i16: case v16i32: case v8i64: + case v64f8e4m3: + case v64f8e5m2: case v32f16: case v32bf16: case v16f32: @@ -1085,6 +1201,8 @@ case v64i16: case v32i32: case v16i64: + case v128f8e4m3: + case v128f8e5m2: case v64f16: case v64bf16: case v32f32: @@ -1224,6 +1342,8 @@ switch (BitWidth) { default: llvm_unreachable("Bad bit width!"); + case 8: + return MVT::f8e4m3; // or should we consider f8e5m2 case 16: return MVT::f16; case 32: @@ -1350,6 +1470,28 @@ case MVT::i128: if (NumElements == 1) return MVT::v1i128; break; + case MVT::f8e4m3: + if (NumElements == 1) return MVT::v1f8e4m3; + if (NumElements == 2) return MVT::v2f8e4m3; + if (NumElements == 3) return MVT::v3f8e4m3; + if (NumElements == 4) return MVT::v4f8e4m3; + if (NumElements == 8) return MVT::v8f8e4m3; + if (NumElements == 16) return MVT::v16f8e4m3; + if (NumElements == 32) return MVT::v32f8e4m3; + if (NumElements == 64) return MVT::v64f8e4m3; + if (NumElements == 128) return MVT::v128f8e4m3; + break; + case MVT::f8e5m2: + if (NumElements == 1) return MVT::v1f8e5m2; + if (NumElements == 2) return MVT::v2f8e5m2; + if (NumElements == 3) return MVT::v3f8e5m2; + if (NumElements == 4) return MVT::v4f8e5m2; + if (NumElements == 8) return MVT::v8f8e5m2; + if (NumElements == 16) return MVT::v16f8e5m2; + if (NumElements == 32) return MVT::v32f8e5m2; + if (NumElements == 64) return MVT::v64f8e5m2; + if (NumElements == 128) return MVT::v128f8e5m2; + break; case MVT::f16: if (NumElements == 1) return MVT::v1f16; if (NumElements == 2) return MVT::v2f16; @@ -1458,6 +1600,18 @@ if (NumElements == 16) return MVT::nxv16i64; if (NumElements == 32) return MVT::nxv32i64; break; + case MVT::f8e4m3: + if (NumElements == 1) return MVT::nxv1f8e4m3; + if (NumElements == 2) return MVT::nxv2f8e4m3; + if (NumElements == 4) return MVT::nxv4f8e4m3; + if (NumElements == 8) return MVT::nxv8f8e4m3; + break; + case MVT::f8e5m2: + if (NumElements == 1) return MVT::nxv1f8e5m2; + if (NumElements == 2) return MVT::nxv2f8e5m2; + if (NumElements == 4) return MVT::nxv4f8e5m2; + if (NumElements == 8) return MVT::nxv8f8e5m2; + break; case MVT::f16: if (NumElements == 1) return MVT::nxv1f16; if (NumElements == 2) return MVT::nxv2f16; diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -805,6 +805,8 @@ TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context)); TYPEKEYWORD("x86_amx", Type::getX86_AMXTy(Context)); TYPEKEYWORD("token", Type::getTokenTy(Context)); + TYPEKEYWORD("f8e5m2", Type::getF8E5M2Ty(Context)); + TYPEKEYWORD("f8e4m3", Type::getF8E4M3Ty(Context)); if (Keyword == "ptr") { // setOpaquePointers() must be called before creating any pointer types. @@ -974,12 +976,13 @@ /// HexPPC128Constant 0xM[0-9A-Fa-f]+ /// HexHalfConstant 0xH[0-9A-Fa-f]+ /// HexBFloatConstant 0xR[0-9A-Fa-f]+ +/// HexFP8Constant 0xQ[0-9A-Fa-f]+ lltok::Kind LLLexer::Lex0x() { CurPtr = TokStart + 2; char Kind; if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H' || - CurPtr[0] == 'R') { + CurPtr[0] == 'R' || CurPtr[0] == 'Q') { Kind = *CurPtr++; } else { Kind = 'J'; @@ -1030,6 +1033,11 @@ APFloatVal = APFloat(APFloat::BFloat(), APInt(16, HexIntToVal(TokStart + 3, CurPtr))); return lltok::APFloat; + case 'Q': + // FP8 + APFloatVal = APFloat(APFloat::Float8E5M2(), + APInt(8, HexIntToVal(TokStart + 1, CurPtr))); + return lltok::APFloat; } } diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -5642,6 +5642,15 @@ ID.APFloatVal.isNegative(), &Payload); } } + // // There is only one defined FP8 literal type, Float8E5M2 (e.g. 0xQ23) + // // For the case the literal is of type Float8E5M2, and the assiened var is + // // Float8E4M3, convert the type. + // if (&ID.APFloatVal.getSemantics() == &APFloat::Float8E5M2() && + // Ty->isF8E4M3()) { + // bool Ignored; + // ID.APFloatVal.convert(APFloat::Float8E4M3FN(), + // APFloat::rmNearestTiesToEven, &Ignored); + // } V = ConstantFP::get(Context, ID.APFloatVal); if (V->getType() != Ty) diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2302,9 +2302,15 @@ case bitc::TYPE_CODE_VOID: // VOID ResultTy = Type::getVoidTy(Context); break; - case bitc::TYPE_CODE_HALF: // HALF + case bitc::TYPE_CODE_HALF: // FP8 ResultTy = Type::getHalfTy(Context); break; + case bitc::TYPE_CODE_F8E4M3: // FP8 + ResultTy = Type::getF8E4M3Ty(Context); + break; + case bitc::TYPE_CODE_F8E5M2: // HALF + ResultTy = Type::getF8E5M2Ty(Context); + break; case bitc::TYPE_CODE_BFLOAT: // BFLOAT ResultTy = Type::getBFloatTy(Context); break; diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -960,6 +960,12 @@ switch (T->getTypeID()) { case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break; case Type::HalfTyID: Code = bitc::TYPE_CODE_HALF; break; + case Type::F8E4M3TyID: + Code = bitc::TYPE_CODE_F8E4M3; + break; + case Type::F8E5M2TyID: + Code = bitc::TYPE_CODE_F8E5M2; + break; case Type::BFloatTyID: Code = bitc::TYPE_CODE_BFLOAT; break; case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break; case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break; diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -194,6 +194,8 @@ case MVT::i32: return Type::getInt32Ty(Context); case MVT::i64: return Type::getInt64Ty(Context); case MVT::i128: return IntegerType::get(Context, 128); + case MVT::f8e4m3: return Type::getF8E4M3Ty(Context); + case MVT::f8e5m2: return Type::getF8E5M2Ty(Context); case MVT::f16: return Type::getHalfTy(Context); case MVT::bf16: return Type::getBFloatTy(Context); case MVT::f32: return Type::getFloatTy(Context); @@ -348,6 +350,42 @@ return FixedVectorType::get(Type::getInt64Ty(Context), 256); case MVT::v1i128: return FixedVectorType::get(Type::getInt128Ty(Context), 1); + case MVT::v1f8e4m3 : + return FixedVectorType::get(Type::getF8E4M3Ty(Context), 1); + case MVT::v2f8e4m3 : + return FixedVectorType::get(Type::getF8E4M3Ty(Context), 2); + case MVT::v3f8e4m3 : + return FixedVectorType::get(Type::getF8E4M3Ty(Context), 3); + case MVT::v4f8e4m3 : + return FixedVectorType::get(Type::getF8E4M3Ty(Context), 4); + case MVT::v8f8e4m3 : + return FixedVectorType::get(Type::getF8E4M3Ty(Context), 8); + case MVT::v16f8e4m3 : + return FixedVectorType::get(Type::getF8E4M3Ty(Context), 16); + case MVT::v32f8e4m3 : + return FixedVectorType::get(Type::getF8E4M3Ty(Context), 32); + case MVT::v64f8e4m3 : + return FixedVectorType::get(Type::getF8E4M3Ty(Context), 64); + case MVT::v128f8e4m3 : + return FixedVectorType::get(Type::getF8E4M3Ty(Context), 128); + case MVT::v1f8e5m2 : + return FixedVectorType::get(Type::getF8E5M2Ty(Context), 1); + case MVT::v2f8e5m2 : + return FixedVectorType::get(Type::getF8E5M2Ty(Context), 2); + case MVT::v3f8e5m2 : + return FixedVectorType::get(Type::getF8E5M2Ty(Context), 3); + case MVT::v4f8e5m2 : + return FixedVectorType::get(Type::getF8E5M2Ty(Context), 4); + case MVT::v8f8e5m2 : + return FixedVectorType::get(Type::getF8E5M2Ty(Context), 8); + case MVT::v16f8e5m2 : + return FixedVectorType::get(Type::getF8E5M2Ty(Context), 16); + case MVT::v32f8e5m2 : + return FixedVectorType::get(Type::getF8E5M2Ty(Context), 32); + case MVT::v64f8e5m2 : + return FixedVectorType::get(Type::getF8E5M2Ty(Context), 64); + case MVT::v128f8e5m2 : + return FixedVectorType::get(Type::getF8E5M2Ty(Context), 128); case MVT::v1f16: return FixedVectorType::get(Type::getHalfTy(Context), 1); case MVT::v2f16: @@ -510,6 +548,22 @@ return ScalableVectorType::get(Type::getInt64Ty(Context), 16); case MVT::nxv32i64: return ScalableVectorType::get(Type::getInt64Ty(Context), 32); + case MVT::nxv1f8e4m3: + return ScalableVectorType::get(Type::getF8E4M3Ty(Context), 1); + case MVT::nxv2f8e4m3: + return ScalableVectorType::get(Type::getF8E4M3Ty(Context), 2); + case MVT::nxv4f8e4m3: + return ScalableVectorType::get(Type::getF8E4M3Ty(Context), 4); + case MVT::nxv8f8e4m3: + return ScalableVectorType::get(Type::getF8E4M3Ty(Context), 8); + case MVT::nxv1f8e5m2: + return ScalableVectorType::get(Type::getF8E5M2Ty(Context), 1); + case MVT::nxv2f8e5m2: + return ScalableVectorType::get(Type::getF8E5M2Ty(Context), 2); + case MVT::nxv4f8e5m2: + return ScalableVectorType::get(Type::getF8E5M2Ty(Context), 4); + case MVT::nxv8f8e5m2: + return ScalableVectorType::get(Type::getF8E5M2Ty(Context), 8); case MVT::nxv1f16: return ScalableVectorType::get(Type::getHalfTy(Context), 1); case MVT::nxv2f16: @@ -569,6 +623,10 @@ return MVT::isVoid; case Type::IntegerTyID: return getIntegerVT(cast(Ty)->getBitWidth()); + case Type::F8E4M3TyID: + return MVT(MVT::f8e4m3); + case Type::F8E5M2TyID: + return MVT(MVT::f8e5m2); case Type::HalfTyID: return MVT(MVT::f16); case Type::BFloatTyID: return MVT(MVT::bf16); case Type::FloatTyID: return MVT(MVT::f32); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -537,6 +537,12 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) { switch (Ty->getTypeID()) { case Type::VoidTyID: OS << "void"; return; + case Type::F8E4M3TyID: + OS << "f8e4m3"; + return; + case Type::F8E5M2TyID: + OS << "f8e5m2"; + return; case Type::HalfTyID: OS << "half"; return; case Type::BFloatTyID: OS << "bfloat"; return; case Type::FloatTyID: OS << "float"; return; @@ -1434,6 +1440,11 @@ Out << 'R'; Out << format_hex_no_prefix(API.getZExtValue(), 4, /*Upper=*/true); + } else if (&APF.getSemantics() == &APFloat::Float8E5M2() || + &APF.getSemantics() == &APFloat::Float8E4M3FN() ) { + Out << 'Q'; + Out << format_hex_no_prefix(API.getZExtValue(), 2, + /*Upper=*/true); } else llvm_unreachable("Unsupported floating point type"); return; diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -351,6 +351,8 @@ switch (Ty->getTypeID()) { case Type::IntegerTyID: return ConstantInt::get(Ty, 0); + case Type::F8E4M3TyID: + case Type::F8E5M2TyID: case Type::HalfTyID: case Type::BFloatTyID: case Type::FloatTyID: @@ -1538,6 +1540,20 @@ Val2.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &losesInfo); return !losesInfo; } + case Type::F8E4M3TyID: { + if (&Val2.getSemantics() == &APFloat::Float8E4M3FN()) + return true; + Val2.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + return !losesInfo; + } + case Type::F8E5M2TyID: { + if (&Val2.getSemantics() == &APFloat::Float8E5M2()) + return true; + Val2.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven, + &losesInfo); + return !losesInfo; + } case Type::BFloatTyID: { if (&Val2.getSemantics() == &APFloat::BFloat()) return true; @@ -3146,6 +3162,14 @@ auto EltVal = *reinterpret_cast(EltPtr); return APFloat(APFloat::IEEEhalf(), APInt(16, EltVal)); } + case Type::F8E4M3TyID: { + auto EltVal = *reinterpret_cast(EltPtr); + return APFloat(APFloat::Float8E4M3FN(), APInt(8, EltVal)); + } + case Type::F8E5M2TyID: { + auto EltVal = *reinterpret_cast(EltPtr); + return APFloat(APFloat::Float8E5M2(), APInt(8, EltVal)); + } case Type::BFloatTyID: { auto EltVal = *reinterpret_cast(EltPtr); return APFloat(APFloat::BFloat(), APInt(16, EltVal)); diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -491,6 +491,10 @@ switch (unwrap(Ty)->getTypeID()) { case Type::VoidTyID: return LLVMVoidTypeKind; + case Type::F8E5M2TyID: + return LLVMFloatE5M2TypeKind; + case Type::F8E4M3TyID: + return LLVMFloatE4M3TypeKind; case Type::HalfTyID: return LLVMHalfTypeKind; case Type::BFloatTyID: @@ -614,6 +618,12 @@ /*--.. Operations on real types ............................................--*/ +LLVMTypeRef LLVMFloatE4M3TypeInContext(LLVMContextRef C) { + return (LLVMTypeRef)Type::getF8E4M3Ty(*unwrap(C)); +} +LLVMTypeRef LLVMFloatE5M2TypeInContext(LLVMContextRef C) { + return (LLVMTypeRef)Type::getF8E5M2Ty(*unwrap(C)); +} LLVMTypeRef LLVMHalfTypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getHalfTy(*unwrap(C)); } @@ -641,7 +651,12 @@ LLVMTypeRef LLVMX86AMXTypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getX86_AMXTy(*unwrap(C)); } - +LLVMTypeRef LLVMFloatE4M3Type(void) { + return LLVMFloatE4M3TypeInContext(LLVMGetGlobalContext()); +} +LLVMTypeRef LLVMFloatE5M2Type(void) { + return LLVMFloatE5M2TypeInContext(LLVMGetGlobalContext()); +} LLVMTypeRef LLVMHalfType(void) { return LLVMHalfTypeInContext(LLVMGetGlobalContext()); } @@ -1407,7 +1422,7 @@ Type *Ty = cFP->getType(); if (Ty->isHalfTy() || Ty->isBFloatTy() || Ty->isFloatTy() || - Ty->isDoubleTy()) { + Ty->isDoubleTy() || Ty->isF8E5M2() || Ty->isF8E4M3()) { *LosesInfo = false; return cFP->getValueAPF().convertToDouble(); } diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -774,6 +774,8 @@ } case Type::IntegerTyID: return getIntegerAlignment(Ty->getIntegerBitWidth(), abi_or_pref); + case Type::F8E4M3TyID: + case Type::F8E5M2TyID: case Type::HalfTyID: case Type::BFloatTyID: case Type::FloatTyID: diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -917,6 +917,12 @@ default: llvm_unreachable("Unhandled type"); case Type::VoidTyID: Result += "isVoid"; break; case Type::MetadataTyID: Result += "Metadata"; break; + case Type::F8E4M3TyID: + Result += "f8e4m3"; + break; + case Type::F8E5M2TyID: + Result += "f8e5m2"; + break; case Type::HalfTyID: Result += "f16"; break; case Type::BFloatTyID: Result += "bf16"; break; case Type::FloatTyID: Result += "f32"; break; diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1463,7 +1463,7 @@ // Basic type instances. Type VoidTy, LabelTy, HalfTy, BFloatTy, FloatTy, DoubleTy, MetadataTy, - TokenTy; + TokenTy, F8E5M2Ty, F8E4M3Ty; Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy, X86_AMXTy; IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty; diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -41,6 +41,7 @@ : DiagHandler(std::make_unique()), VoidTy(C, Type::VoidTyID), LabelTy(C, Type::LabelTyID), HalfTy(C, Type::HalfTyID), BFloatTy(C, Type::BFloatTyID), + F8E5M2Ty(C, Type::F8E5M2TyID), F8E4M3Ty(C, Type::F8E4M3TyID), FloatTy(C, Type::FloatTyID), DoubleTy(C, Type::DoubleTyID), MetadataTy(C, Type::MetadataTyID), TokenTy(C, Type::TokenTyID), X86_FP80Ty(C, Type::X86_FP80TyID), FP128Ty(C, Type::FP128TyID), diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -49,6 +49,10 @@ case X86_MMXTyID : return getX86_MMXTy(C); case X86_AMXTyID : return getX86_AMXTy(C); case TokenTyID : return getTokenTy(C); + case F8E4M3TyID: + return getF8E4M3Ty(C); + case F8E5M2TyID: + return getF8E5M2Ty(C); default: return nullptr; } @@ -66,6 +70,10 @@ const fltSemantics &Type::getFltSemantics() const { switch (getTypeID()) { + case F8E4M3TyID: + return APFloat::Float8E4M3FN(); + case F8E5M2TyID: + return APFloat::Float8E5M2(); case HalfTyID: return APFloat::IEEEhalf(); case BFloatTyID: return APFloat::BFloat(); case FloatTyID: return APFloat::IEEEsingle(); @@ -87,6 +95,10 @@ Ty = Type::getHalfTy(C); else if (&S == &APFloat::BFloat()) Ty = Type::getBFloatTy(C); + else if (&S == &APFloat::Float8E5M2()) + Ty = Type::getF8E5M2Ty(C); + else if (&S == &APFloat::Float8E4M3FN()) + Ty = Type::getF8E4M3Ty(C); else if (&S == &APFloat::IEEEsingle()) Ty = Type::getFloatTy(C); else if (&S == &APFloat::IEEEdouble()) @@ -163,6 +175,10 @@ TypeSize Type::getPrimitiveSizeInBits() const { switch (getTypeID()) { + case Type::F8E4M3TyID: + return TypeSize::Fixed(8); + case Type::F8E5M2TyID: + return TypeSize::Fixed(8); case Type::HalfTyID: return TypeSize::Fixed(16); case Type::BFloatTyID: return TypeSize::Fixed(16); case Type::FloatTyID: return TypeSize::Fixed(32); @@ -196,6 +212,10 @@ return VTy->getElementType()->getFPMantissaWidth(); assert(isFloatingPointTy() && "Not a floating point type!"); if (getTypeID() == HalfTyID) return 11; + if (getTypeID() == F8E4M3TyID) + return 3; + if (getTypeID() == F8E5M2TyID) + return 2; if (getTypeID() == BFloatTyID) return 8; if (getTypeID() == FloatTyID) return 24; if (getTypeID() == DoubleTyID) return 53; @@ -222,6 +242,8 @@ Type *Type::getVoidTy(LLVMContext &C) { return &C.pImpl->VoidTy; } Type *Type::getLabelTy(LLVMContext &C) { return &C.pImpl->LabelTy; } Type *Type::getHalfTy(LLVMContext &C) { return &C.pImpl->HalfTy; } +Type *Type::getF8E5M2Ty(LLVMContext &C) { return &C.pImpl->F8E5M2Ty; } +Type *Type::getF8E4M3Ty(LLVMContext &C) { return &C.pImpl->F8E4M3Ty; } Type *Type::getBFloatTy(LLVMContext &C) { return &C.pImpl->BFloatTy; } Type *Type::getFloatTy(LLVMContext &C) { return &C.pImpl->FloatTy; } Type *Type::getDoubleTy(LLVMContext &C) { return &C.pImpl->DoubleTy; } @@ -248,6 +270,14 @@ return getHalfTy(C)->getPointerTo(AS); } +PointerType *Type::getF8E5M2PtrTy(LLVMContext &C, unsigned AS) { + return getF8E5M2Ty(C)->getPointerTo(AS); +} + +PointerType *Type::getF8E4M3PtrTy(LLVMContext &C, unsigned AS) { + return getF8E4M3Ty(C)->getPointerTo(AS); +} + PointerType *Type::getBFloatPtrTy(LLVMContext &C, unsigned AS) { return getBFloatTy(C)->getPointerTo(AS); } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp --- a/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -333,6 +333,8 @@ case Type::X86_AMXTyID: case Type::TokenTyID: case Type::TypedPointerTyID: + case Type::F8E4M3TyID: + case Type::F8E5M2TyID: return 0; } diff --git a/llvm/test/Assembler/fp8.ll b/llvm/test/Assembler/fp8.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Assembler/fp8.ll @@ -0,0 +1,61 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s --check-prefix=ASSEM-DISASS +; RUN: opt < %s -O3 -S | FileCheck %s --check-prefix=OPT +; RUN: verify-uselistorder %s +; Basic smoke tests for f8e4m3 and f8e5m2 types. + +define f8e4m3 @check_f8e4m3(f8e4m3 %A) { +; ASSEM-DISASS: ret f8e4m3 %A + ret f8e4m3 %A +} + +define f8e4m3 @check_f8e4m3_literal() { +; ASSEM-DISASS: ret f8e4m3 0x31 + ret f8e4m3 0xQ31 +} + +define <4 x f8e4m3> @check_fixed_vector_f8e4m3() { +; ASSEM-DISASS: ret <4 x f8e4m3> %tmp + %tmp = fadd <4 x f8e4m3> undef, undef + ret <4 x f8e4m3> %tmp +} + +define @check_vector_f8e4m3() { +; ASSEM-DISASS: ret %tmp + %tmp = fadd undef, undef + ret %tmp +} + +define f8e4m3 @check_f8e4m3_constprop() { + %tmp = fadd f8e4m3 0xQ40, 0xQ40 +; OPT: 0xQ48 + ret f8e4m3 %tmp +} + +define f8e5m2 @check_f8e5m2(f8e5m2 %A) { +; ASSEM-DISASS: ret f8e5m2 %A + ret f8e5m2 %A +} + +define f8e5m2 @check_f8e5m2_literal() { +; ASSEM-DISASS: ret f8e5m2 0x31 + ret f8e5m2 0xQ31 +} + +define <4 x f8e5m2> @check_fixed_vector_f8e5m2() { +; ASSEM-DISASS: ret <4 x f8e5m2> %tmp + %tmp = fadd <4 x f8e5m2> undef, undef + ret <4 x f8e5m2> %tmp +} + +define @check_vector_f8e5m2() { +; ASSEM-DISASS: ret %tmp + %tmp = fadd undef, undef + ret %tmp +} + +define f8e5m2 @check_f8e5m2_constprop() { + %tmp = fadd f8e5m2 0xQ40, 0xQ40 +; OPT: 0xQ44 + ret f8e5m2 %tmp +} + diff --git a/llvm/tools/llvm-c-test/echo.cpp b/llvm/tools/llvm-c-test/echo.cpp --- a/llvm/tools/llvm-c-test/echo.cpp +++ b/llvm/tools/llvm-c-test/echo.cpp @@ -73,6 +73,10 @@ switch (Kind) { case LLVMVoidTypeKind: return LLVMVoidTypeInContext(Ctx); + case LLVMFloatE4M3TypeKind: + return LLVMFloatE4M3TypeInContext(Ctx); + case LLVMFloatE5M2TypeKind: + return LLVMFloatE5M2TypeInContext(Ctx); case LLVMHalfTypeKind: return LLVMHalfTypeInContext(Ctx); case LLVMBFloatTypeKind: