diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -28,217 +28,218 @@ def i64 : ValueType<64, 8>; // 64-bit integer value def i128 : ValueType<128, 9>; // 128-bit integer value -def bf16 : ValueType<16, 10>; // 16-bit brain floating point value -def f16 : ValueType<16, 11>; // 16-bit floating point value -def f32 : ValueType<32, 12>; // 32-bit floating point value -def f64 : ValueType<64, 13>; // 64-bit floating point value -def f80 : ValueType<80, 14>; // 80-bit floating point value -def f128 : ValueType<128, 15>; // 128-bit floating point value -def ppcf128 : ValueType<128, 16>; // PPC 128-bit floating point value - -def v1i1 : ValueType<1, 17>; // 1 x i1 vector value -def v2i1 : ValueType<2, 18>; // 2 x i1 vector value -def v4i1 : ValueType<4, 19>; // 4 x i1 vector value -def v8i1 : ValueType<8, 20>; // 8 x i1 vector value -def v16i1 : ValueType<16, 21>; // 16 x i1 vector value -def v32i1 : ValueType<32, 22>; // 32 x i1 vector value -def v64i1 : ValueType<64, 23>; // 64 x i1 vector value -def v128i1 : ValueType<128, 24>; // 128 x i1 vector value -def v256i1 : ValueType<256, 25>; // 256 x i1 vector value -def v512i1 : ValueType<512, 26>; // 512 x i1 vector value -def v1024i1 : ValueType<1024, 27>; // 1024 x i1 vector value -def v2048i1 : ValueType<2048, 28>; // 2048 x i1 vector value - -def v128i2 : ValueType<256, 29>; // 128 x i2 vector value -def v256i2 : ValueType<512, 30>; // 256 x i2 vector value - -def v64i4 : ValueType<256, 31>; // 64 x i4 vector value -def v128i4 : ValueType<512, 32>; // 128 x i4 vector value - -def v1i8 : ValueType<8, 33>; // 1 x i8 vector value -def v2i8 : ValueType<16, 34>; // 2 x i8 vector value -def v4i8 : ValueType<32, 35>; // 4 x i8 vector value -def v8i8 : ValueType<64, 36>; // 8 x i8 vector value -def v16i8 : ValueType<128, 37>; // 16 x i8 vector value -def v32i8 : ValueType<256, 38>; // 32 x i8 vector value -def v64i8 : ValueType<512, 39>; // 64 x i8 vector value -def v128i8 : ValueType<1024, 40>; // 128 x i8 vector value -def v256i8 : ValueType<2048, 41>; // 256 x i8 vector value -def v512i8 : ValueType<4096, 42>; // 512 x i8 vector value -def v1024i8 : ValueType<8192, 43>; // 1024 x i8 vector value - -def v1i16 : ValueType<16, 44>; // 1 x i16 vector value -def v2i16 : ValueType<32, 45>; // 2 x i16 vector value -def v3i16 : ValueType<48, 46>; // 3 x i16 vector value -def v4i16 : ValueType<64, 47>; // 4 x i16 vector value -def v8i16 : ValueType<128, 48>; // 8 x i16 vector value -def v16i16 : ValueType<256, 49>; // 16 x i16 vector value -def v32i16 : ValueType<512, 50>; // 32 x i16 vector value -def v64i16 : ValueType<1024, 51>; // 64 x i16 vector value -def v128i16 : ValueType<2048, 52>; // 128 x i16 vector value -def v256i16 : ValueType<4096, 53>; // 256 x i16 vector value -def v512i16 : ValueType<8192, 54>; // 512 x i16 vector value - -def v1i32 : ValueType<32, 55>; // 1 x i32 vector value -def v2i32 : ValueType<64, 56>; // 2 x i32 vector value -def v3i32 : ValueType<96, 57>; // 3 x i32 vector value -def v4i32 : ValueType<128, 58>; // 4 x i32 vector value -def v5i32 : ValueType<160, 59>; // 5 x i32 vector value -def v6i32 : ValueType<192, 60>; // 6 x f32 vector value -def v7i32 : ValueType<224, 61>; // 7 x f32 vector value -def v8i32 : ValueType<256, 62>; // 8 x i32 vector value -def v9i32 : ValueType<288, 63>; // 9 x i32 vector value -def v10i32 : ValueType<320, 64>; // 10 x i32 vector value -def v11i32 : ValueType<352, 65>; // 11 x i32 vector value -def v12i32 : ValueType<384, 66>; // 12 x i32 vector value -def v16i32 : ValueType<512, 67>; // 16 x i32 vector value -def v32i32 : ValueType<1024, 68>; // 32 x i32 vector value -def v64i32 : ValueType<2048, 69>; // 64 x i32 vector value -def v128i32 : ValueType<4096, 70>; // 128 x i32 vector value -def v256i32 : ValueType<8192, 71>; // 256 x i32 vector value -def v512i32 : ValueType<16384, 72>; // 512 x i32 vector value -def v1024i32 : ValueType<32768, 73>; // 1024 x i32 vector value -def v2048i32 : ValueType<65536, 74>; // 2048 x i32 vector value - -def v1i64 : ValueType<64, 75>; // 1 x i64 vector value -def v2i64 : ValueType<128, 76>; // 2 x i64 vector value -def v3i64 : ValueType<192, 77>; // 3 x i64 vector value -def v4i64 : ValueType<256, 78>; // 4 x i64 vector value -def v8i64 : ValueType<512, 79>; // 8 x i64 vector value -def v16i64 : ValueType<1024, 80>; // 16 x i64 vector value -def v32i64 : ValueType<2048, 81>; // 32 x i64 vector value -def v64i64 : ValueType<4096, 82>; // 64 x i64 vector value -def v128i64 : ValueType<8192, 83>; // 128 x i64 vector value -def v256i64 : ValueType<16384, 84>; // 256 x i64 vector value - -def v1i128 : ValueType<128, 85>; // 1 x i128 vector value - -def v1f16 : ValueType<16, 86>; // 1 x f16 vector value -def v2f16 : ValueType<32, 87>; // 2 x f16 vector value -def v3f16 : ValueType<48, 88>; // 3 x f16 vector value -def v4f16 : ValueType<64, 89>; // 4 x f16 vector value -def v8f16 : ValueType<128, 90>; // 8 x f16 vector value -def v16f16 : ValueType<256, 91>; // 16 x f16 vector value -def v32f16 : ValueType<512, 92>; // 32 x f16 vector value -def v64f16 : ValueType<1024, 93>; // 64 x f16 vector value -def v128f16 : ValueType<2048, 94>; // 128 x f16 vector value -def v256f16 : ValueType<4096, 95>; // 256 x f16 vector value -def v512f16 : ValueType<8192, 96>; // 512 x f16 vector value - -def v2bf16 : ValueType<32, 97>; // 2 x bf16 vector value -def v3bf16 : ValueType<48, 98>; // 3 x bf16 vector value -def v4bf16 : ValueType<64, 99>; // 4 x bf16 vector value -def v8bf16 : ValueType<128, 100>; // 8 x bf16 vector value -def v16bf16 : ValueType<256, 101>; // 16 x bf16 vector value -def v32bf16 : ValueType<512, 102>; // 32 x bf16 vector value -def v64bf16 : ValueType<1024, 103>; // 64 x bf16 vector value -def v128bf16 : ValueType<2048, 104>; // 128 x bf16 vector value - -def v1f32 : ValueType<32, 105>; // 1 x f32 vector value -def v2f32 : ValueType<64, 106>; // 2 x f32 vector value -def v3f32 : ValueType<96, 107>; // 3 x f32 vector value -def v4f32 : ValueType<128, 108>; // 4 x f32 vector value -def v5f32 : ValueType<160, 109>; // 5 x f32 vector value -def v6f32 : ValueType<192, 110>; // 6 x f32 vector value -def v7f32 : ValueType<224, 111>; // 7 x f32 vector value -def v8f32 : ValueType<256, 112>; // 8 x f32 vector value -def v9f32 : ValueType<288, 113>; // 9 x f32 vector value -def v10f32 : ValueType<320, 114>; // 10 x f32 vector value -def v11f32 : ValueType<352, 115>; // 11 x f32 vector value -def v12f32 : ValueType<384, 116>; // 12 x f32 vector value -def v16f32 : ValueType<512, 117>; // 16 x f32 vector value -def v32f32 : ValueType<1024, 118>; // 32 x f32 vector value -def v64f32 : ValueType<2048, 119>; // 64 x f32 vector value -def v128f32 : ValueType<4096, 120>; // 128 x f32 vector value -def v256f32 : ValueType<8192, 121>; // 256 x f32 vector value -def v512f32 : ValueType<16384, 122>; // 512 x f32 vector value -def v1024f32 : ValueType<32768, 123>; // 1024 x f32 vector value -def v2048f32 : ValueType<65536, 124>; // 2048 x f32 vector value - -def v1f64 : ValueType<64, 125>; // 1 x f64 vector value -def v2f64 : ValueType<128, 126>; // 2 x f64 vector value -def v3f64 : ValueType<192, 127>; // 3 x f64 vector value -def v4f64 : ValueType<256, 128>; // 4 x f64 vector value -def v8f64 : ValueType<512, 129>; // 8 x f64 vector value -def v16f64 : ValueType<1024, 130>; // 16 x f64 vector value -def v32f64 : ValueType<2048, 131>; // 32 x f64 vector value -def v64f64 : ValueType<4096, 132>; // 64 x f64 vector value -def v128f64 : ValueType<8192, 133>; // 128 x f64 vector value -def v256f64 : ValueType<16384, 134>; // 256 x f64 vector value - -def nxv1i1 : ValueType<1, 135>; // n x 1 x i1 vector value -def nxv2i1 : ValueType<2, 136>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 137>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 138>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 139>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 140>; // n x 32 x i1 vector value -def nxv64i1 : ValueType<64, 141>; // n x 64 x i1 vector value - -def nxv1i8 : ValueType<8, 142>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 143>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 144>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 145>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 146>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 147>; // n x 32 x i8 vector value -def nxv64i8 : ValueType<512, 148>; // n x 64 x i8 vector value - -def nxv1i16 : ValueType<16, 149>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 150>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 151>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 152>; // n x 8 x i16 vector value -def nxv16i16 : ValueType<256, 153>; // n x 16 x i16 vector value -def nxv32i16 : ValueType<512, 154>; // n x 32 x i16 vector value - -def nxv1i32 : ValueType<32, 155>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 156>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 157>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 158>; // n x 8 x i32 vector value -def nxv16i32 : ValueType<512, 159>; // n x 16 x i32 vector value -def nxv32i32 : ValueType<1024, 160>; // n x 32 x i32 vector value - -def nxv1i64 : ValueType<64, 161>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 162>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 163>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 164>; // n x 8 x i64 vector value -def nxv16i64 : ValueType<1024, 165>; // n x 16 x i64 vector value -def nxv32i64 : ValueType<2048, 166>; // n x 32 x i64 vector value - -def nxv1f16 : ValueType<16, 167>; // n x 1 x f16 vector value -def nxv2f16 : ValueType<32, 168>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64, 169>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 170>; // n x 8 x f16 vector value -def nxv16f16 : ValueType<256, 171>; // n x 16 x f16 vector value -def nxv32f16 : ValueType<512, 172>; // n x 32 x f16 vector value - -def nxv1bf16 : ValueType<16, 173>; // n x 1 x bf16 vector value -def nxv2bf16 : ValueType<32, 174>; // n x 2 x bf16 vector value -def nxv4bf16 : ValueType<64, 175>; // n x 4 x bf16 vector value -def nxv8bf16 : ValueType<128, 176>; // n x 8 x bf16 vector value -def nxv16bf16 : ValueType<256, 177>; // n x 16 x bf16 vector value -def nxv32bf16 : ValueType<512, 178>; // n x 32 x bf16 vector value - -def nxv1f32 : ValueType<32, 179>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64, 180>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 181>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 182>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 183>; // n x 16 x f32 vector value - -def nxv1f64 : ValueType<64, 184>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 185>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 186>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 187>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64, 188>; // X86 MMX value -def FlagVT : ValueType<0, 189>; // Pre-RA sched glue -def isVoid : ValueType<0, 190>; // Produces no value -def untyped : ValueType<8, 191>; // Produces an untyped value -def funcref : ValueType<0, 192>; // WebAssembly's funcref type -def externref : ValueType<0, 193>; // WebAssembly's externref type -def x86amx : ValueType<8192, 194>; // X86 AMX value -def i64x8 : ValueType<512, 195>; // 8 Consecutive GPRs (AArch64) +def f8 : ValueType<8, 10>; // 8-bit floating point value +def bf16 : ValueType<16, 11>; // 16-bit brain floating point value +def f16 : ValueType<16, 12>; // 16-bit floating point value +def f32 : ValueType<32, 13>; // 32-bit floating point value +def f64 : ValueType<64, 14>; // 64-bit floating point value +def f80 : ValueType<80, 15>; // 80-bit floating point value +def f128 : ValueType<128, 16>; // 128-bit floating point value +def ppcf128 : ValueType<128, 17>; // PPC 128-bit floating point value + +def v1i1 : ValueType<1, 18>; // 1 x i1 vector value +def v2i1 : ValueType<2, 19>; // 2 x i1 vector value +def v4i1 : ValueType<4, 20>; // 4 x i1 vector value +def v8i1 : ValueType<8, 21>; // 8 x i1 vector value +def v16i1 : ValueType<16, 22>; // 16 x i1 vector value +def v32i1 : ValueType<32, 23>; // 32 x i1 vector value +def v64i1 : ValueType<64, 24>; // 64 x i1 vector value +def v128i1 : ValueType<128, 25>; // 128 x i1 vector value +def v256i1 : ValueType<256, 26>; // 256 x i1 vector value +def v512i1 : ValueType<512, 27>; // 512 x i1 vector value +def v1024i1 : ValueType<1024, 28>; // 1024 x i1 vector value +def v2048i1 : ValueType<2048, 29>; // 2048 x i1 vector value + +def v128i2 : ValueType<256, 30>; // 128 x i2 vector value +def v256i2 : ValueType<512, 31>; // 256 x i2 vector value + +def v64i4 : ValueType<256, 32>; // 64 x i4 vector value +def v128i4 : ValueType<512, 33>; // 128 x i4 vector value + +def v1i8 : ValueType<8, 34>; // 1 x i8 vector value +def v2i8 : ValueType<16, 35>; // 2 x i8 vector value +def v4i8 : ValueType<32, 36>; // 4 x i8 vector value +def v8i8 : ValueType<64, 37>; // 8 x i8 vector value +def v16i8 : ValueType<128, 38>; // 16 x i8 vector value +def v32i8 : ValueType<256, 39>; // 32 x i8 vector value +def v64i8 : ValueType<512, 40>; // 64 x i8 vector value +def v128i8 : ValueType<1024, 41>; // 128 x i8 vector value +def v256i8 : ValueType<2048, 42>; // 256 x i8 vector value +def v512i8 : ValueType<4096, 43>; // 512 x i8 vector value +def v1024i8 : ValueType<8192, 44>; // 1024 x i8 vector value + +def v1i16 : ValueType<16, 45>; // 1 x i16 vector value +def v2i16 : ValueType<32, 46>; // 2 x i16 vector value +def v3i16 : ValueType<48, 47>; // 3 x i16 vector value +def v4i16 : ValueType<64, 48>; // 4 x i16 vector value +def v8i16 : ValueType<128, 49>; // 8 x i16 vector value +def v16i16 : ValueType<256, 50>; // 16 x i16 vector value +def v32i16 : ValueType<512, 51>; // 32 x i16 vector value +def v64i16 : ValueType<1024, 52>; // 64 x i16 vector value +def v128i16 : ValueType<2048, 53>; // 128 x i16 vector value +def v256i16 : ValueType<4096, 54>; // 256 x i16 vector value +def v512i16 : ValueType<8192, 55>; // 512 x i16 vector value + +def v1i32 : ValueType<32, 56>; // 1 x i32 vector value +def v2i32 : ValueType<64, 57>; // 2 x i32 vector value +def v3i32 : ValueType<96, 58>; // 3 x i32 vector value +def v4i32 : ValueType<128, 59>; // 4 x i32 vector value +def v5i32 : ValueType<160, 60>; // 5 x i32 vector value +def v6i32 : ValueType<192, 61>; // 6 x f32 vector value +def v7i32 : ValueType<224, 62>; // 7 x f32 vector value +def v8i32 : ValueType<256, 63>; // 8 x i32 vector value +def v9i32 : ValueType<288, 64>; // 9 x i32 vector value +def v10i32 : ValueType<320, 65>; // 10 x i32 vector value +def v11i32 : ValueType<352, 66>; // 11 x i32 vector value +def v12i32 : ValueType<384, 67>; // 12 x i32 vector value +def v16i32 : ValueType<512, 68>; // 16 x i32 vector value +def v32i32 : ValueType<1024, 69>; // 32 x i32 vector value +def v64i32 : ValueType<2048, 70>; // 64 x i32 vector value +def v128i32 : ValueType<4096, 71>; // 128 x i32 vector value +def v256i32 : ValueType<8192, 72>; // 256 x i32 vector value +def v512i32 : ValueType<16384, 73>; // 512 x i32 vector value +def v1024i32 : ValueType<32768, 74>; // 1024 x i32 vector value +def v2048i32 : ValueType<65536, 75>; // 2048 x i32 vector value + +def v1i64 : ValueType<64, 76>; // 1 x i64 vector value +def v2i64 : ValueType<128, 77>; // 2 x i64 vector value +def v3i64 : ValueType<192, 78>; // 3 x i64 vector value +def v4i64 : ValueType<256, 79>; // 4 x i64 vector value +def v8i64 : ValueType<512, 80>; // 8 x i64 vector value +def v16i64 : ValueType<1024, 81>; // 16 x i64 vector value +def v32i64 : ValueType<2048, 82>; // 32 x i64 vector value +def v64i64 : ValueType<4096, 83>; // 64 x i64 vector value +def v128i64 : ValueType<8192, 84>; // 128 x i64 vector value +def v256i64 : ValueType<16384, 85>; // 256 x i64 vector value + +def v1i128 : ValueType<128, 86>; // 1 x i128 vector value + +def v1f16 : ValueType<16, 87>; // 1 x f16 vector value +def v2f16 : ValueType<32, 88>; // 2 x f16 vector value +def v3f16 : ValueType<48, 89>; // 3 x f16 vector value +def v4f16 : ValueType<64, 90>; // 4 x f16 vector value +def v8f16 : ValueType<128, 91>; // 8 x f16 vector value +def v16f16 : ValueType<256, 92>; // 16 x f16 vector value +def v32f16 : ValueType<512, 93>; // 32 x f16 vector value +def v64f16 : ValueType<1024, 94>; // 64 x f16 vector value +def v128f16 : ValueType<2048, 95>; // 128 x f16 vector value +def v256f16 : ValueType<4096, 96>; // 256 x f16 vector value +def v512f16 : ValueType<8192, 97>; // 512 x f16 vector value + +def v2bf16 : ValueType<32, 98>; // 2 x bf16 vector value +def v3bf16 : ValueType<48, 99>; // 3 x bf16 vector value +def v4bf16 : ValueType<64, 100>; // 4 x bf16 vector value +def v8bf16 : ValueType<128, 101>; // 8 x bf16 vector value +def v16bf16 : ValueType<256, 102>; // 16 x bf16 vector value +def v32bf16 : ValueType<512, 103>; // 32 x bf16 vector value +def v64bf16 : ValueType<1024, 104>; // 64 x bf16 vector value +def v128bf16 : ValueType<2048, 105>; // 128 x bf16 vector value + +def v1f32 : ValueType<32, 106>; // 1 x f32 vector value +def v2f32 : ValueType<64, 107>; // 2 x f32 vector value +def v3f32 : ValueType<96, 108>; // 3 x f32 vector value +def v4f32 : ValueType<128, 109>; // 4 x f32 vector value +def v5f32 : ValueType<160, 110>; // 5 x f32 vector value +def v6f32 : ValueType<192, 111>; // 6 x f32 vector value +def v7f32 : ValueType<224, 112>; // 7 x f32 vector value +def v8f32 : ValueType<256, 113>; // 8 x f32 vector value +def v9f32 : ValueType<288, 114>; // 9 x f32 vector value +def v10f32 : ValueType<320, 115>; // 10 x f32 vector value +def v11f32 : ValueType<352, 116>; // 11 x f32 vector value +def v12f32 : ValueType<384, 117>; // 12 x f32 vector value +def v16f32 : ValueType<512, 118>; // 16 x f32 vector value +def v32f32 : ValueType<1024, 119>; // 32 x f32 vector value +def v64f32 : ValueType<2048, 120>; // 64 x f32 vector value +def v128f32 : ValueType<4096, 121>; // 128 x f32 vector value +def v256f32 : ValueType<8192, 122>; // 256 x f32 vector value +def v512f32 : ValueType<16384, 123>; // 512 x f32 vector value +def v1024f32 : ValueType<32768, 124>; // 1024 x f32 vector value +def v2048f32 : ValueType<65536, 125>; // 2048 x f32 vector value + +def v1f64 : ValueType<64, 126>; // 1 x f64 vector value +def v2f64 : ValueType<128, 127>; // 2 x f64 vector value +def v3f64 : ValueType<192, 128>; // 3 x f64 vector value +def v4f64 : ValueType<256, 129>; // 4 x f64 vector value +def v8f64 : ValueType<512, 130>; // 8 x f64 vector value +def v16f64 : ValueType<1024, 131>; // 16 x f64 vector value +def v32f64 : ValueType<2048, 132>; // 32 x f64 vector value +def v64f64 : ValueType<4096, 133>; // 64 x f64 vector value +def v128f64 : ValueType<8192, 134>; // 128 x f64 vector value +def v256f64 : ValueType<16384, 135>; // 256 x f64 vector value + +def nxv1i1 : ValueType<1, 136>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 137>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 138>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 139>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 140>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 141>; // n x 32 x i1 vector value +def nxv64i1 : ValueType<64, 142>; // n x 64 x i1 vector value + +def nxv1i8 : ValueType<8, 143>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 144>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 145>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 146>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 147>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 148>; // n x 32 x i8 vector value +def nxv64i8 : ValueType<512, 149>; // n x 64 x i8 vector value + +def nxv1i16 : ValueType<16, 150>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 151>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 152>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 153>; // n x 8 x i16 vector value +def nxv16i16 : ValueType<256, 154>; // n x 16 x i16 vector value +def nxv32i16 : ValueType<512, 155>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 156>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 157>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 158>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 159>; // n x 8 x i32 vector value +def nxv16i32 : ValueType<512, 160>; // n x 16 x i32 vector value +def nxv32i32 : ValueType<1024, 161>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 162>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 163>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 164>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 165>; // n x 8 x i64 vector value +def nxv16i64 : ValueType<1024, 166>; // n x 16 x i64 vector value +def nxv32i64 : ValueType<2048, 167>; // n x 32 x i64 vector value + +def nxv1f16 : ValueType<16, 168>; // n x 1 x f16 vector value +def nxv2f16 : ValueType<32, 169>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64, 170>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 171>; // n x 8 x f16 vector value +def nxv16f16 : ValueType<256, 172>; // n x 16 x f16 vector value +def nxv32f16 : ValueType<512, 173>; // n x 32 x f16 vector value + +def nxv1bf16 : ValueType<16, 174>; // n x 1 x bf16 vector value +def nxv2bf16 : ValueType<32, 175>; // n x 2 x bf16 vector value +def nxv4bf16 : ValueType<64, 176>; // n x 4 x bf16 vector value +def nxv8bf16 : ValueType<128, 177>; // n x 8 x bf16 vector value +def nxv16bf16 : ValueType<256, 178>; // n x 16 x bf16 vector value +def nxv32bf16 : ValueType<512, 179>; // n x 32 x bf16 vector value + +def nxv1f32 : ValueType<32, 180>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64, 181>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 182>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 183>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 184>; // n x 16 x f32 vector value + +def nxv1f64 : ValueType<64, 185>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 186>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 187>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 188>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64, 189>; // X86 MMX value +def FlagVT : ValueType<0, 190>; // Pre-RA sched glue +def isVoid : ValueType<0, 191>; // Produces no value +def untyped : ValueType<8, 192>; // Produces an untyped value +def funcref : ValueType<0, 193>; // WebAssembly's funcref type +def externref : ValueType<0, 194>; // WebAssembly's externref type +def x86amx : ValueType<8192, 195>; // X86 AMX value +def i64x8 : ValueType<512, 196>; // 8 Consecutive GPRs (AArch64) def aarch64svcount - : ValueType<16, 196>; // AArch64 predicate-as-counter -def spirvbuiltin : ValueType<0, 197>; // SPIR-V's builtin type + : ValueType<16, 197>; // AArch64 predicate-as-counter +def spirvbuiltin : ValueType<0, 198>; // SPIR-V's builtin type def token : ValueType<0, 248>; // TokenTy def MetadataVT : ValueType<0, 249>; // Metadata diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -52,150 +52,151 @@ FIRST_INTEGER_VALUETYPE = i1, LAST_INTEGER_VALUETYPE = i128, - bf16 = 10, // This is a 16 bit brain floating point value - f16 = 11, // This is a 16 bit floating point value - f32 = 12, // This is a 32 bit floating point value - f64 = 13, // This is a 64 bit floating point value - f80 = 14, // This is a 80 bit floating point value - f128 = 15, // This is a 128 bit floating point value - ppcf128 = 16, // This is a PPC 128-bit floating point value - - FIRST_FP_VALUETYPE = bf16, + f8 = 10, // This is a 8 bit floating point value + bf16 = 11, // This is a 16 bit brain floating point value + f16 = 12, // This is a 16 bit floating point value + f32 = 13, // This is a 32 bit floating point value + f64 = 14, // This is a 64 bit floating point value + f80 = 15, // This is a 80 bit floating point value + f128 = 16, // This is a 128 bit floating point value + ppcf128 = 17, // This is a PPC 128-bit floating point value + + FIRST_FP_VALUETYPE = f8, LAST_FP_VALUETYPE = ppcf128, - v1i1 = 17, // 1 x i1 - v2i1 = 18, // 2 x i1 - v4i1 = 19, // 4 x i1 - v8i1 = 20, // 8 x i1 - v16i1 = 21, // 16 x i1 - v32i1 = 22, // 32 x i1 - v64i1 = 23, // 64 x i1 - v128i1 = 24, // 128 x i1 - v256i1 = 25, // 256 x i1 - v512i1 = 26, // 512 x i1 - v1024i1 = 27, // 1024 x i1 - v2048i1 = 28, // 2048 x i1 - - v128i2 = 29, // 128 x i2 - v256i2 = 30, // 256 x i2 - - v64i4 = 31, // 64 x i4 - v128i4 = 32, // 128 x i4 - - v1i8 = 33, // 1 x i8 - v2i8 = 34, // 2 x i8 - v4i8 = 35, // 4 x i8 - v8i8 = 36, // 8 x i8 - v16i8 = 37, // 16 x i8 - v32i8 = 38, // 32 x i8 - v64i8 = 39, // 64 x i8 - v128i8 = 40, // 128 x i8 - v256i8 = 41, // 256 x i8 - v512i8 = 42, // 512 x i8 - v1024i8 = 43, // 1024 x i8 - - v1i16 = 44, // 1 x i16 - v2i16 = 45, // 2 x i16 - v3i16 = 46, // 3 x i16 - v4i16 = 47, // 4 x i16 - v8i16 = 48, // 8 x i16 - v16i16 = 49, // 16 x i16 - v32i16 = 50, // 32 x i16 - v64i16 = 51, // 64 x i16 - v128i16 = 52, // 128 x i16 - v256i16 = 53, // 256 x i16 - v512i16 = 54, // 512 x i16 - - v1i32 = 55, // 1 x i32 - v2i32 = 56, // 2 x i32 - v3i32 = 57, // 3 x i32 - v4i32 = 58, // 4 x i32 - v5i32 = 59, // 5 x i32 - v6i32 = 60, // 6 x i32 - v7i32 = 61, // 7 x i32 - v8i32 = 62, // 8 x i32 - v9i32 = 63, // 9 x i32 - v10i32 = 64, // 10 x i32 - v11i32 = 65, // 11 x i32 - v12i32 = 66, // 12 x i32 - v16i32 = 67, // 16 x i32 - v32i32 = 68, // 32 x i32 - v64i32 = 69, // 64 x i32 - v128i32 = 70, // 128 x i32 - v256i32 = 71, // 256 x i32 - v512i32 = 72, // 512 x i32 - v1024i32 = 73, // 1024 x i32 - v2048i32 = 74, // 2048 x i32 - - v1i64 = 75, // 1 x i64 - v2i64 = 76, // 2 x i64 - v3i64 = 77, // 3 x i64 - v4i64 = 78, // 4 x i64 - v8i64 = 79, // 8 x i64 - v16i64 = 80, // 16 x i64 - v32i64 = 81, // 32 x i64 - v64i64 = 82, // 64 x i64 - v128i64 = 83, // 128 x i64 - v256i64 = 84, // 256 x i64 - - v1i128 = 85, // 1 x i128 + v1i1 = 18, // 1 x i1 + v2i1 = 19, // 2 x i1 + v4i1 = 20, // 4 x i1 + v8i1 = 21, // 8 x i1 + v16i1 = 22, // 16 x i1 + v32i1 = 23, // 32 x i1 + v64i1 = 24, // 64 x i1 + v128i1 = 25, // 128 x i1 + v256i1 = 26, // 256 x i1 + v512i1 = 27, // 512 x i1 + v1024i1 = 28, // 1024 x i1 + v2048i1 = 29, // 2048 x i1 + + v128i2 = 30, // 128 x i2 + v256i2 = 31, // 256 x i2 + + v64i4 = 32, // 64 x i4 + v128i4 = 33, // 128 x i4 + + v1i8 = 34, // 1 x i8 + v2i8 = 35, // 2 x i8 + v4i8 = 36, // 4 x i8 + v8i8 = 37, // 8 x i8 + v16i8 = 38, // 16 x i8 + v32i8 = 39, // 32 x i8 + v64i8 = 40, // 64 x i8 + v128i8 = 41, // 128 x i8 + v256i8 = 42, // 256 x i8 + v512i8 = 43, // 512 x i8 + v1024i8 = 44, // 1024 x i8 + + v1i16 = 45, // 1 x i16 + v2i16 = 46, // 2 x i16 + v3i16 = 47, // 3 x i16 + v4i16 = 48, // 4 x i16 + v8i16 = 49, // 8 x i16 + v16i16 = 50, // 16 x i16 + v32i16 = 51, // 32 x i16 + v64i16 = 52, // 64 x i16 + v128i16 = 53, // 128 x i16 + v256i16 = 54, // 256 x i16 + v512i16 = 55, // 512 x i16 + + v1i32 = 56, // 1 x i32 + v2i32 = 57, // 2 x i32 + v3i32 = 58, // 3 x i32 + v4i32 = 59, // 4 x i32 + v5i32 = 60, // 5 x i32 + v6i32 = 61, // 6 x i32 + v7i32 = 62, // 7 x i32 + v8i32 = 63, // 8 x i32 + v9i32 = 64, // 9 x i32 + v10i32 = 65, // 10 x i32 + v11i32 = 66, // 11 x i32 + v12i32 = 67, // 12 x i32 + v16i32 = 68, // 16 x i32 + v32i32 = 69, // 32 x i32 + v64i32 = 70, // 64 x i32 + v128i32 = 71, // 128 x i32 + v256i32 = 72, // 256 x i32 + v512i32 = 73, // 512 x i32 + v1024i32 = 74, // 1024 x i32 + v2048i32 = 75, // 2048 x i32 + + v1i64 = 76, // 1 x i64 + v2i64 = 77, // 2 x i64 + v3i64 = 78, // 3 x i64 + v4i64 = 79, // 4 x i64 + v8i64 = 80, // 8 x i64 + v16i64 = 81, // 16 x i64 + v32i64 = 82, // 32 x i64 + v64i64 = 83, // 64 x i64 + v128i64 = 84, // 128 x i64 + v256i64 = 85, // 256 x i64 + + v1i128 = 86, // 1 x i128 FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1, LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128, - v1f16 = 86, // 1 x f16 - v2f16 = 87, // 2 x f16 - v3f16 = 88, // 3 x f16 - v4f16 = 89, // 4 x f16 - v8f16 = 90, // 8 x f16 - v16f16 = 91, // 16 x f16 - v32f16 = 92, // 32 x f16 - v64f16 = 93, // 64 x f16 - v128f16 = 94, // 128 x f16 - v256f16 = 95, // 256 x f16 - v512f16 = 96, // 512 x f16 - - v2bf16 = 97, // 2 x bf16 - v3bf16 = 98, // 3 x bf16 - v4bf16 = 99, // 4 x bf16 - v8bf16 = 100, // 8 x bf16 - v16bf16 = 101, // 16 x bf16 - v32bf16 = 102, // 32 x bf16 - v64bf16 = 103, // 64 x bf16 - v128bf16 = 104, // 128 x bf16 - - v1f32 = 105, // 1 x f32 - v2f32 = 106, // 2 x f32 - v3f32 = 107, // 3 x f32 - v4f32 = 108, // 4 x f32 - v5f32 = 109, // 5 x f32 - v6f32 = 110, // 6 x f32 - v7f32 = 111, // 7 x f32 - v8f32 = 112, // 8 x f32 - v9f32 = 113, // 9 x f32 - v10f32 = 114, // 10 x f32 - v11f32 = 115, // 11 x f32 - v12f32 = 116, // 12 x f32 - v16f32 = 117, // 16 x f32 - - v32f32 = 118, // 32 x f32 - v64f32 = 119, // 64 x f32 - v128f32 = 120, // 128 x f32 - v256f32 = 121, // 256 x f32 - v512f32 = 122, // 512 x f32 - v1024f32 = 123, // 1024 x f32 - v2048f32 = 124, // 2048 x f32 - - v1f64 = 125, // 1 x f64 - v2f64 = 126, // 2 x f64 - v3f64 = 127, // 3 x f64 - v4f64 = 128, // 4 x f64 - v8f64 = 129, // 8 x f64 - v16f64 = 130, // 16 x f64 - v32f64 = 131, // 32 x f64 - v64f64 = 132, // 64 x f64 - v128f64 = 133, // 128 x f64 - v256f64 = 134, // 256 x f64 + v1f16 = 87, // 1 x f16 + v2f16 = 88, // 2 x f16 + v3f16 = 89, // 3 x f16 + v4f16 = 90, // 4 x f16 + v8f16 = 91, // 8 x f16 + v16f16 = 92, // 16 x f16 + v32f16 = 93, // 32 x f16 + v64f16 = 94, // 64 x f16 + v128f16 = 95, // 128 x f16 + v256f16 = 96, // 256 x f16 + v512f16 = 97, // 512 x f16 + + v2bf16 = 98, // 2 x bf16 + v3bf16 = 99, // 3 x bf16 + v4bf16 = 100, // 4 x bf16 + v8bf16 = 101, // 8 x bf16 + v16bf16 = 102, // 16 x bf16 + v32bf16 = 103, // 32 x bf16 + v64bf16 = 104, // 64 x bf16 + v128bf16 = 105, // 128 x bf16 + + v1f32 = 106, // 1 x f32 + v2f32 = 107, // 2 x f32 + v3f32 = 108, // 3 x f32 + v4f32 = 109, // 4 x f32 + v5f32 = 110, // 5 x f32 + v6f32 = 111, // 6 x f32 + v7f32 = 112, // 7 x f32 + v8f32 = 113, // 8 x f32 + v9f32 = 114, // 9 x f32 + v10f32 = 115, // 10 x f32 + v11f32 = 116, // 11 x f32 + v12f32 = 117, // 12 x f32 + v16f32 = 118, // 16 x f32 + + v32f32 = 119, // 32 x f32 + v64f32 = 120, // 64 x f32 + v128f32 = 121, // 128 x f32 + v256f32 = 122, // 256 x f32 + v512f32 = 123, // 512 x f32 + v1024f32 = 124, // 1024 x f32 + v2048f32 = 125, // 2048 x f32 + + v1f64 = 126, // 1 x f64 + v2f64 = 127, // 2 x f64 + v3f64 = 128, // 3 x f64 + v4f64 = 129, // 4 x f64 + v8f64 = 130, // 8 x f64 + v16f64 = 131, // 16 x f64 + v32f64 = 132, // 32 x f64 + v64f64 = 133, // 64 x f64 + v128f64 = 134, // 128 x f64 + v256f64 = 135, // 256 x f64 FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v1f16, LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v256f64, @@ -203,70 +204,70 @@ FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1, LAST_FIXEDLEN_VECTOR_VALUETYPE = v256f64, - nxv1i1 = 135, // n x 1 x i1 - nxv2i1 = 136, // n x 2 x i1 - nxv4i1 = 137, // n x 4 x i1 - nxv8i1 = 138, // n x 8 x i1 - nxv16i1 = 139, // n x 16 x i1 - nxv32i1 = 140, // n x 32 x i1 - nxv64i1 = 141, // n x 64 x i1 - - nxv1i8 = 142, // n x 1 x i8 - nxv2i8 = 143, // n x 2 x i8 - nxv4i8 = 144, // n x 4 x i8 - nxv8i8 = 145, // n x 8 x i8 - nxv16i8 = 146, // n x 16 x i8 - nxv32i8 = 147, // n x 32 x i8 - nxv64i8 = 148, // n x 64 x i8 - - nxv1i16 = 149, // n x 1 x i16 - nxv2i16 = 150, // n x 2 x i16 - nxv4i16 = 151, // n x 4 x i16 - nxv8i16 = 152, // n x 8 x i16 - nxv16i16 = 153, // n x 16 x i16 - nxv32i16 = 154, // n x 32 x i16 - - nxv1i32 = 155, // n x 1 x i32 - nxv2i32 = 156, // n x 2 x i32 - nxv4i32 = 157, // n x 4 x i32 - nxv8i32 = 158, // n x 8 x i32 - nxv16i32 = 159, // n x 16 x i32 - nxv32i32 = 160, // n x 32 x i32 - - nxv1i64 = 161, // n x 1 x i64 - nxv2i64 = 162, // n x 2 x i64 - nxv4i64 = 163, // n x 4 x i64 - nxv8i64 = 164, // n x 8 x i64 - nxv16i64 = 165, // n x 16 x i64 - nxv32i64 = 166, // n x 32 x i64 + nxv1i1 = 136, // n x 1 x i1 + nxv2i1 = 137, // n x 2 x i1 + nxv4i1 = 138, // n x 4 x i1 + nxv8i1 = 139, // n x 8 x i1 + nxv16i1 = 140, // n x 16 x i1 + nxv32i1 = 141, // n x 32 x i1 + nxv64i1 = 142, // n x 64 x i1 + + nxv1i8 = 143, // n x 1 x i8 + nxv2i8 = 144, // n x 2 x i8 + nxv4i8 = 145, // n x 4 x i8 + nxv8i8 = 146, // n x 8 x i8 + nxv16i8 = 147, // n x 16 x i8 + nxv32i8 = 148, // n x 32 x i8 + nxv64i8 = 149, // n x 64 x i8 + + nxv1i16 = 150, // n x 1 x i16 + nxv2i16 = 151, // n x 2 x i16 + nxv4i16 = 152, // n x 4 x i16 + nxv8i16 = 153, // n x 8 x i16 + nxv16i16 = 154, // n x 16 x i16 + nxv32i16 = 155, // n x 32 x i16 + + nxv1i32 = 156, // n x 1 x i32 + nxv2i32 = 157, // n x 2 x i32 + nxv4i32 = 158, // n x 4 x i32 + nxv8i32 = 159, // n x 8 x i32 + nxv16i32 = 160, // n x 16 x i32 + nxv32i32 = 161, // n x 32 x i32 + + nxv1i64 = 162, // n x 1 x i64 + nxv2i64 = 163, // n x 2 x i64 + nxv4i64 = 164, // n x 4 x i64 + nxv8i64 = 165, // n x 8 x i64 + nxv16i64 = 166, // n x 16 x i64 + nxv32i64 = 167, // n x 32 x i64 FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1, LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64, - nxv1f16 = 167, // n x 1 x f16 - nxv2f16 = 168, // n x 2 x f16 - nxv4f16 = 169, // n x 4 x f16 - nxv8f16 = 170, // n x 8 x f16 - nxv16f16 = 171, // n x 16 x f16 - nxv32f16 = 172, // n x 32 x f16 - - nxv1bf16 = 173, // n x 1 x bf16 - nxv2bf16 = 174, // n x 2 x bf16 - nxv4bf16 = 175, // n x 4 x bf16 - nxv8bf16 = 176, // n x 8 x bf16 - nxv16bf16 = 177, // n x 16 x bf16 - nxv32bf16 = 178, // n x 32 x bf16 - - nxv1f32 = 179, // n x 1 x f32 - nxv2f32 = 180, // n x 2 x f32 - nxv4f32 = 181, // n x 4 x f32 - nxv8f32 = 182, // n x 8 x f32 - nxv16f32 = 183, // n x 16 x f32 - - nxv1f64 = 184, // n x 1 x f64 - nxv2f64 = 185, // n x 2 x f64 - nxv4f64 = 186, // n x 4 x f64 - nxv8f64 = 187, // n x 8 x f64 + nxv1f16 = 168, // n x 1 x f16 + nxv2f16 = 169, // n x 2 x f16 + nxv4f16 = 170, // n x 4 x f16 + nxv8f16 = 171, // n x 8 x f16 + nxv16f16 = 172, // n x 16 x f16 + nxv32f16 = 173, // n x 32 x f16 + + nxv1bf16 = 174, // n x 1 x bf16 + nxv2bf16 = 175, // n x 2 x bf16 + nxv4bf16 = 176, // n x 4 x bf16 + nxv8bf16 = 177, // n x 8 x bf16 + nxv16bf16 = 178, // n x 16 x bf16 + nxv32bf16 = 179, // n x 32 x bf16 + + nxv1f32 = 180, // n x 1 x f32 + nxv2f32 = 181, // n x 2 x f32 + nxv4f32 = 182, // n x 4 x f32 + nxv8f32 = 183, // n x 8 x f32 + nxv16f32 = 184, // n x 16 x f32 + + nxv1f64 = 185, // n x 1 x f64 + nxv2f64 = 186, // n x 2 x f64 + nxv4f64 = 187, // n x 4 x f64 + nxv8f64 = 188, // n x 8 x f64 FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv1f16, LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64, @@ -277,22 +278,22 @@ FIRST_VECTOR_VALUETYPE = v1i1, LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 188, // This is an X86 MMX value + x86mmx = 189, // This is an X86 MMX value - Glue = 189, // This glues nodes together during pre-RA sched + Glue = 190, // This glues nodes together during pre-RA sched - isVoid = 190, // This has no value + isVoid = 191, // This has no value - Untyped = 191, // This value takes a register, but has + Untyped = 192, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. - funcref = 192, // WebAssembly's funcref type - externref = 193, // WebAssembly's externref type - x86amx = 194, // This is an X86 AMX value - i64x8 = 195, // 8 Consecutive GPRs (AArch64) - aarch64svcount = 196, // AArch64 predicate-as-counter - spirvbuiltin = 197, // SPIR-V's builtin type + funcref = 193, // WebAssembly's funcref type + externref = 194, // WebAssembly's externref type + x86amx = 195, // This is an X86 AMX value + i64x8 = 196, // 8 Consecutive GPRs (AArch64) + aarch64svcount = 197, // AArch64 predicate-as-counter + spirvbuiltin = 198, // SPIR-V's builtin type FIRST_VALUETYPE = 1, // This is always the beginning of the list. LAST_VALUETYPE = spirvbuiltin, // This always remains at the end of the list. @@ -963,6 +964,7 @@ case v4i1: return TypeSize::Fixed(4); case nxv4i1: return TypeSize::Scalable(4); case i8 : + case f8: case v1i8: case v8i1: return TypeSize::Fixed(8); case nxv1i8: @@ -1245,6 +1247,8 @@ switch (BitWidth) { default: llvm_unreachable("Bad bit width!"); + case 8: + return MVT::f8; case 16: return MVT::f16; case 32: diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -302,3 +302,108 @@ (STLXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>; def : Pat<(int_aarch64_stxp GPR64:$lo, GPR64:$hi, GPR64:$addr), (STXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>; + +multiclass SIMDAcrossLanesSignedIntrinsicBHS { + def : Pat<(i32 (intOp (v8i8 V64:$Rn))), + (i32 (SMOVvi8to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), + (i64 0)))>; + def : Pat<(i32 (intOp (v16i8 V128:$Rn))), + (i32 (SMOVvi8to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), + (i64 0)))>; + + def : Pat<(i32 (intOp (v4i16 V64:$Rn))), + (i32 (SMOVvi16to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), + (i64 0)))>; + def : Pat<(i32 (intOp (v8i16 V128:$Rn))), + (i32 (SMOVvi16to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), + (i64 0)))>; + + def : Pat<(i32 (intOp (v4i32 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), + ssub))>; +} + +multiclass SIMDAcrossLanesUnsignedIntrinsicBHS { + def : Pat<(i32 (intOp (v8i8 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), + ssub))>; + def : Pat<(i32 (intOp (v16i8 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), + ssub))>; + + def : Pat<(i32 (intOp (v4i16 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), + ssub))>; + def : Pat<(i32 (intOp (v8i16 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), + ssub))>; + + def : Pat<(i32 (intOp (v4i32 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), + ssub))>; +} + + +defm : SIMDAcrossLanesSignedIntrinsicBHS<"ADDV", int_aarch64_neon_saddv>; +// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm +def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (ADDPv2i32 V64:$Rn, V64:$Rn), dsub), + ssub))>; + +defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"ADDV", int_aarch64_neon_uaddv>; +def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (ADDPv2i32 V64:$Rn, V64:$Rn), dsub), + ssub))>; + +defm : SIMDAcrossLanesSignedIntrinsicBHS<"SMAXV", int_aarch64_neon_smaxv>; +def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (SMAXPv2i32 V64:$Rn, V64:$Rn), dsub), + ssub))>; + +defm : SIMDAcrossLanesSignedIntrinsicBHS<"SMINV", int_aarch64_neon_sminv>; +def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (SMINPv2i32 V64:$Rn, V64:$Rn), dsub), + ssub))>; + +defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"UMAXV", int_aarch64_neon_umaxv>; +def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (UMAXPv2i32 V64:$Rn, V64:$Rn), dsub), + ssub))>; + +defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"UMINV", int_aarch64_neon_uminv>; +def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (UMINPv2i32 V64:$Rn, V64:$Rn), dsub), + ssub))>; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2804,7 +2804,7 @@ defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; // Floating-point -defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", untyped, load>; +defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", f8, load>; defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>; defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>; defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>; @@ -3569,7 +3569,7 @@ // Floating-point -defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", untyped, store>; +defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", f8, store>; defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>; defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>; defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>; @@ -3979,7 +3979,7 @@ // (immediate pre-indexed) def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>; def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>; -def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, untyped>; +def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, f8>; def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>; def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>; def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>; @@ -4033,7 +4033,7 @@ // (immediate post-indexed) def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>; def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>; -def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, untyped>; +def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, f8>; def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>; def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>; def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -435,7 +435,7 @@ def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias; } -def FPR8 : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> { +def FPR8 : RegisterClass<"AArch64", [f8], 8, (sequence "B%u", 0, 31)> { let Size = 8; } def FPR16 : RegisterClass<"AArch64", [f16, bf16], 16, (sequence "H%u", 0, 31)> { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -481,14 +481,35 @@ getValueMapping(RBIdx, Size), NumOperands); } -/// \returns true if a given intrinsic \p ID only uses and defines FPRs. -static bool isFPIntrinsic(unsigned ID) { +/// \returns true if a given intrinsic only uses and defines FPRs. +static bool isFPIntrinsic(const MachineRegisterInfo &MRI, + const MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC); // TODO: Add more intrinsics. - switch (ID) { + switch (MI.getIntrinsicID()) { default: return false; case Intrinsic::aarch64_neon_uaddlv: + case Intrinsic::aarch64_neon_uaddv: + case Intrinsic::aarch64_neon_umaxv: + case Intrinsic::aarch64_neon_uminv: + case Intrinsic::aarch64_neon_fmaxv: + case Intrinsic::aarch64_neon_fminv: + case Intrinsic::aarch64_neon_fmaxnmv: + case Intrinsic::aarch64_neon_fminnmv: return true; + case Intrinsic::aarch64_neon_saddlv: { + const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); + return SrcTy.getElementType().getSizeInBits() >= 16 && + SrcTy.getElementCount().getFixedValue() >= 4; + } + case Intrinsic::aarch64_neon_saddv: + case Intrinsic::aarch64_neon_smaxv: + case Intrinsic::aarch64_neon_sminv: { + const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); + return SrcTy.getElementType().getSizeInBits() >= 32 && + SrcTy.getElementCount().getFixedValue() >= 2; + } } } @@ -497,7 +518,7 @@ const TargetRegisterInfo &TRI, unsigned Depth) const { unsigned Op = MI.getOpcode(); - if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MI.getIntrinsicID())) + if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI)) return true; // Do we have an explicit floating point instruction? @@ -968,9 +989,8 @@ case TargetOpcode::G_INTRINSIC: { // Check if we know that the intrinsic has any constraints on its register // banks. If it does, then update the mapping accordingly. - unsigned ID = MI.getIntrinsicID(); unsigned Idx = 0; - if (!isFPIntrinsic(ID)) + if (!isFPIntrinsic(MRI, MI)) break; for (const auto &Op : MI.explicit_operands()) { if (Op.isReg()) diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll --- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=generic | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=generic | FileCheck %s -check-prefixes=CHECK,SDAG +; RUN: llc < %s -global-isel=1 -global-isel-abort=2 -mtriple=aarch64-eabi -aarch64-neon-syntax=generic 2>&1 | FileCheck %s --check-prefixes=CHECK,GISEL ; Function Attrs: nounwind readnone declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) @@ -9,6 +10,14 @@ declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) +; GISEL-NOT: Instruction selection used fallback path for add_B +; GISEL-NOT: Instruction selection used fallback path for add_H +; GISEL-NOT: Instruction selection used fallback path for add_S +; GISEL-NOT: Instruction selection used fallback path for add_D +; GISEL-NOT: Instruction selection used fallback path for oversized_ADDV_512 +; GISEL-NOT: Instruction selection used fallback path for addv_combine_i32 +; GISEL-NOT: Instruction selection used fallback path for addv_combine_i64 + define i8 @add_B(ptr %arr) { ; CHECK-LABEL: add_B: ; CHECK: // %bb.0: @@ -84,16 +93,27 @@ declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) define i32 @oversized_ADDV_512(ptr %arr) { -; CHECK-LABEL: oversized_ADDV_512: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: add v0.4s, v3.4s, v0.4s -; CHECK-NEXT: add v1.4s, v2.4s, v1.4s -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: addv s0, v0.4s -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; SDAG-LABEL: oversized_ADDV_512: +; SDAG: // %bb.0: +; SDAG-NEXT: ldp q0, q1, [x0, #32] +; SDAG-NEXT: ldp q3, q2, [x0] +; SDAG-NEXT: add v0.4s, v3.4s, v0.4s +; SDAG-NEXT: add v1.4s, v2.4s, v1.4s +; SDAG-NEXT: add v0.4s, v0.4s, v1.4s +; SDAG-NEXT: addv s0, v0.4s +; SDAG-NEXT: fmov w0, s0 +; SDAG-NEXT: ret +; +; GISEL-LABEL: oversized_ADDV_512: +; GISEL: // %bb.0: +; GISEL-NEXT: ldp q0, q1, [x0] +; GISEL-NEXT: ldp q2, q3, [x0, #32] +; GISEL-NEXT: add v0.4s, v0.4s, v1.4s +; GISEL-NEXT: add v1.4s, v2.4s, v3.4s +; GISEL-NEXT: add v0.4s, v0.4s, v1.4s +; GISEL-NEXT: addv s0, v0.4s +; GISEL-NEXT: fmov w0, s0 +; GISEL-NEXT: ret %bin.rdx = load <16 x i32>, ptr %arr %r = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %bin.rdx) ret i32 %r @@ -128,12 +148,21 @@ } define i32 @addv_combine_i32(<4 x i32> %a1, <4 x i32> %a2) { -; CHECK-LABEL: addv_combine_i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: addv s0, v0.4s -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; SDAG-LABEL: addv_combine_i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: add v0.4s, v0.4s, v1.4s +; SDAG-NEXT: addv s0, v0.4s +; SDAG-NEXT: fmov w0, s0 +; SDAG-NEXT: ret +; +; GISEL-LABEL: addv_combine_i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: addv s0, v0.4s +; GISEL-NEXT: addv s1, v1.4s +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: add w0, w8, w9 +; GISEL-NEXT: ret entry: %rdx.1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a1) %rdx.2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2) @@ -142,12 +171,21 @@ } define i64 @addv_combine_i64(<2 x i64> %a1, <2 x i64> %a2) { -; CHECK-LABEL: addv_combine_i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: add v0.2d, v0.2d, v1.2d -; CHECK-NEXT: addp d0, v0.2d -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; SDAG-LABEL: addv_combine_i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: add v0.2d, v0.2d, v1.2d +; SDAG-NEXT: addp d0, v0.2d +; SDAG-NEXT: fmov x0, d0 +; SDAG-NEXT: ret +; +; GISEL-LABEL: addv_combine_i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: addp d0, v0.2d +; GISEL-NEXT: addp d1, v1.2d +; GISEL-NEXT: fmov x8, d0 +; GISEL-NEXT: fmov x9, d1 +; GISEL-NEXT: add x0, x8, x9 +; GISEL-NEXT: ret entry: %rdx.1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1) %rdx.2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a2) diff --git a/llvm/test/CodeGen/AArch64/arm64-fminv.ll b/llvm/test/CodeGen/AArch64/arm64-fminv.ll --- a/llvm/test/CodeGen/AArch64/arm64-fminv.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fminv.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s +; RUN: llc -global-isel=1 -mtriple=arm64-linux-gnu -o - %s | FileCheck %s define float @test_fminv_v2f32(<2 x float> %in) { ; CHECK: test_fminv_v2f32: diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-across.ll b/llvm/test/CodeGen/AArch64/arm64-neon-across.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-across.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-across.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -global-isel=1 -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s +; RUN: llc -global-isel=1 -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s declare <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8>, <8 x i8>) diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp --- a/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/CodeGenTarget.cpp @@ -71,6 +71,7 @@ case MVT::iAny: return "MVT::iAny"; case MVT::fAny: return "MVT::fAny"; case MVT::vAny: return "MVT::vAny"; + case MVT::f8: return "MVT::f8"; case MVT::f16: return "MVT::f16"; case MVT::bf16: return "MVT::bf16"; case MVT::f32: return "MVT::f32";