diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -65,139 +65,147 @@ def v32i16 : ValueType<512, 41>; // 32 x i16 vector value def v64i16 : ValueType<1024, 42>; // 64 x i16 vector value def v128i16 : ValueType<2048, 43>; // 128 x i16 vector value - -def v1i32 : ValueType<32, 44>; // 1 x i32 vector value -def v2i32 : ValueType<64, 45>; // 2 x i32 vector value -def v3i32 : ValueType<96, 46>; // 3 x i32 vector value -def v4i32 : ValueType<128, 47>; // 4 x i32 vector value -def v5i32 : ValueType<160, 48>; // 5 x i32 vector value -def v8i32 : ValueType<256, 49>; // 8 x i32 vector value -def v16i32 : ValueType<512, 50>; // 16 x i32 vector value -def v32i32 : ValueType<1024, 51>; // 32 x i32 vector value -def v64i32 : ValueType<2048, 52>; // 64 x i32 vector value -def v128i32 : ValueType<4096, 53>; // 128 x i32 vector value -def v256i32 : ValueType<8192, 54>; // 256 x i32 vector value -def v512i32 : ValueType<16384, 55>; // 512 x i32 vector value -def v1024i32 : ValueType<32768, 56>; // 1024 x i32 vector value -def v2048i32 : ValueType<65536, 57>; // 2048 x i32 vector value - -def v1i64 : ValueType<64, 58>; // 1 x i64 vector value -def v2i64 : ValueType<128, 59>; // 2 x i64 vector value -def v4i64 : ValueType<256, 60>; // 4 x i64 vector value -def v8i64 : ValueType<512, 61>; // 8 x i64 vector value -def v16i64 : ValueType<1024, 62>; // 16 x i64 vector value -def v32i64 : ValueType<2048, 63>; // 32 x i64 vector value -def v64i64 : ValueType<4096, 64>; // 64 x i64 vector value -def v128i64 : ValueType<8192, 65>; // 128 x i64 vector value -def v256i64 : ValueType<16384, 66>; // 256 x i64 vector value - -def v1i128 : ValueType<128, 67>; // 1 x i128 vector value - -def v1f16 : ValueType<16, 68>; // 1 x f16 vector value -def v2f16 : ValueType<32, 69>; // 2 x f16 vector value -def v3f16 : ValueType<48, 70>; // 3 x f16 vector value -def v4f16 : ValueType<64, 71>; // 4 x f16 vector value -def v8f16 : ValueType<128, 72>; // 8 x f16 vector value -def v16f16 : ValueType<256, 73>; // 16 x f16 vector value -def v32f16 : ValueType<512, 74>; // 32 x f16 vector value -def v64f16 : ValueType<1024, 75>; // 64 x f16 vector value -def v128f16 : ValueType<2048, 76>; // 128 x f16 vector value -def v2bf16 : ValueType<32, 77>; // 2 x bf16 vector value -def v3bf16 : ValueType<48, 78>; // 3 x bf16 vector value -def v4bf16 : ValueType<64, 79>; // 4 x bf16 vector value -def v8bf16 : ValueType<128, 80>; // 8 x bf16 vector value -def v16bf16 : ValueType<256, 81>; // 16 x bf16 vector value -def v32bf16 : ValueType<512, 82>; // 32 x bf16 vector value -def v64bf16 : ValueType<1024, 83>; // 64 x bf16 vector value -def v128bf16 : ValueType<2048, 84>; // 128 x bf16 vector value -def v1f32 : ValueType<32, 85>; // 1 x f32 vector value -def v2f32 : ValueType<64, 86>; // 2 x f32 vector value -def v3f32 : ValueType<96, 87>; // 3 x f32 vector value -def v4f32 : ValueType<128, 88>; // 4 x f32 vector value -def v5f32 : ValueType<160, 89>; // 5 x f32 vector value -def v8f32 : ValueType<256, 90>; // 8 x f32 vector value -def v16f32 : ValueType<512, 91>; // 16 x f32 vector value -def v32f32 : ValueType<1024, 92>; // 32 x f32 vector value -def v64f32 : ValueType<2048, 93>; // 64 x f32 vector value -def v128f32 : ValueType<4096, 94>; // 128 x f32 vector value -def v256f32 : ValueType<8192, 95>; // 256 x f32 vector value -def v512f32 : ValueType<16384, 96>; // 512 x f32 vector value -def v1024f32 : ValueType<32768, 97>; // 1024 x f32 vector value -def v2048f32 : ValueType<65536, 98>; // 2048 x f32 vector value -def v1f64 : ValueType<64, 99>; // 1 x f64 vector value -def v2f64 : ValueType<128, 100>; // 2 x f64 vector value -def v4f64 : ValueType<256, 101>; // 4 x f64 vector value -def v8f64 : ValueType<512, 102>; // 8 x f64 vector value -def v16f64 : ValueType<1024, 103>; // 16 x f64 vector value -def v32f64 : ValueType<2048, 104>; // 32 x f64 vector value -def v64f64 : ValueType<4096, 105>; // 64 x f64 vector value -def v128f64 : ValueType<8192, 106>; // 128 x f64 vector value -def v256f64 : ValueType<16384, 107>; // 256 x f64 vector value - -def nxv1i1 : ValueType<1, 108>; // n x 1 x i1 vector value -def nxv2i1 : ValueType<2, 109>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 110>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 111>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 112>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 113>; // n x 32 x i1 vector value -def nxv64i1 : ValueType<64, 114>; // n x 64 x i1 vector value - -def nxv1i8 : ValueType<8, 115>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 116>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 117>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 118>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 119>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 120>; // n x 32 x i8 vector value -def nxv64i8 : ValueType<512, 121>; // n x 64 x i8 vector value - -def nxv1i16 : ValueType<16, 122>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 123>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 124>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 125>; // n x 8 x i16 vector value -def nxv16i16 : ValueType<256, 126>; // n x 16 x i16 vector value -def nxv32i16 : ValueType<512, 127>; // n x 32 x i16 vector value - -def nxv1i32 : ValueType<32, 128>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 129>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 130>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 131>; // n x 8 x i32 vector value -def nxv16i32 : ValueType<512, 132>; // n x 16 x i32 vector value -def nxv32i32 : ValueType<1024, 133>; // n x 32 x i32 vector value - -def nxv1i64 : ValueType<64, 134>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 135>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 136>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 137>; // n x 8 x i64 vector value -def nxv16i64 : ValueType<1024, 138>; // n x 16 x i64 vector value -def nxv32i64 : ValueType<2048, 139>; // n x 32 x i64 vector value - -def nxv1f16 : ValueType<16, 140>; // n x 1 x f16 vector value -def nxv2f16 : ValueType<32, 141>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64, 142>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 143>; // n x 8 x f16 vector value -def nxv16f16 : ValueType<256, 144>; // n x 16 x f16 vector value -def nxv32f16 : ValueType<512, 145>; // n x 32 x f16 vector value -def nxv1bf16 : ValueType<16, 146>; // n x 1 x bf16 vector value -def nxv2bf16 : ValueType<32, 147>; // n x 2 x bf16 vector value -def nxv4bf16 : ValueType<64, 148>; // n x 4 x bf16 vector value -def nxv8bf16 : ValueType<128, 149>; // n x 8 x bf16 vector value -def nxv1f32 : ValueType<32, 150>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64, 151>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 152>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 153>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 154>; // n x 16 x f32 vector value -def nxv1f64 : ValueType<64, 155>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 156>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 157>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 158>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64, 159>; // X86 MMX value -def FlagVT : ValueType<0, 160>; // Pre-RA sched glue -def isVoid : ValueType<0, 161>; // Produces no value -def untyped : ValueType<8, 162>; // Produces an untyped value -def funcref : ValueType<0, 163>; // WebAssembly's funcref type -def externref : ValueType<0, 164>; // WebAssembly's externref type -def x86amx : ValueType<8192, 165>; // X86 AMX value +def v256i16 : ValueType<4096, 44>; // 256 x i16 vector value + +def v1i32 : ValueType<32, 45>; // 1 x i32 vector value +def v2i32 : ValueType<64, 46>; // 2 x i32 vector value +def v3i32 : ValueType<96, 47>; // 3 x i32 vector value +def v4i32 : ValueType<128, 48>; // 4 x i32 vector value +def v5i32 : ValueType<160, 49>; // 5 x i32 vector value +def v8i32 : ValueType<256, 50>; // 8 x i32 vector value +def v16i32 : ValueType<512, 51>; // 16 x i32 vector value +def v32i32 : ValueType<1024, 52>; // 32 x i32 vector value +def v64i32 : ValueType<2048, 53>; // 64 x i32 vector value +def v128i32 : ValueType<4096, 54>; // 128 x i32 vector value +def v256i32 : ValueType<8192, 55>; // 256 x i32 vector value +def v512i32 : ValueType<16384, 56>; // 512 x i32 vector value +def v1024i32 : ValueType<32768, 57>; // 1024 x i32 vector value +def v2048i32 : ValueType<65536, 58>; // 2048 x i32 vector value + +def v1i64 : ValueType<64, 59>; // 1 x i64 vector value +def v2i64 : ValueType<128, 60>; // 2 x i64 vector value +def v4i64 : ValueType<256, 61>; // 4 x i64 vector value +def v8i64 : ValueType<512, 62>; // 8 x i64 vector value +def v16i64 : ValueType<1024, 63>; // 16 x i64 vector value +def v32i64 : ValueType<2048, 64>; // 32 x i64 vector value +def v64i64 : ValueType<4096, 65>; // 64 x i64 vector value +def v128i64 : ValueType<8192, 66>; // 128 x i64 vector value +def v256i64 : ValueType<16384, 67>; // 256 x i64 vector value + +def v1i128 : ValueType<128, 68>; // 1 x i128 vector value + +def v1f16 : ValueType<16, 69>; // 1 x f16 vector value +def v2f16 : ValueType<32, 70>; // 2 x f16 vector value +def v3f16 : ValueType<48, 71>; // 3 x f16 vector value +def v4f16 : ValueType<64, 72>; // 4 x f16 vector value +def v8f16 : ValueType<128, 73>; // 8 x f16 vector value +def v16f16 : ValueType<256, 74>; // 16 x f16 vector value +def v32f16 : ValueType<512, 75>; // 32 x f16 vector value +def v64f16 : ValueType<1024, 76>; // 64 x f16 vector value +def v128f16 : ValueType<2048, 77>; // 128 x f16 vector value +def v256f16 : ValueType<4096, 78>; // 256 x f16 vector value + +def v2bf16 : ValueType<32, 79>; // 2 x bf16 vector value +def v3bf16 : ValueType<48, 80>; // 3 x bf16 vector value +def v4bf16 : ValueType<64, 81>; // 4 x bf16 vector value +def v8bf16 : ValueType<128, 82>; // 8 x bf16 vector value +def v16bf16 : ValueType<256, 83>; // 16 x bf16 vector value +def v32bf16 : ValueType<512, 84>; // 32 x bf16 vector value +def v64bf16 : ValueType<1024, 85>; // 64 x bf16 vector value +def v128bf16 : ValueType<2048, 86>; // 128 x bf16 vector value + +def v1f32 : ValueType<32, 87>; // 1 x f32 vector value +def v2f32 : ValueType<64, 88>; // 2 x f32 vector value +def v3f32 : ValueType<96, 89>; // 3 x f32 vector value +def v4f32 : ValueType<128, 90>; // 4 x f32 vector value +def v5f32 : ValueType<160, 91>; // 5 x f32 vector value +def v8f32 : ValueType<256, 92>; // 8 x f32 vector value +def v16f32 : ValueType<512, 93>; // 16 x f32 vector value +def v32f32 : ValueType<1024, 94>; // 32 x f32 vector value +def v64f32 : ValueType<2048, 95>; // 64 x f32 vector value +def v128f32 : ValueType<4096, 96>; // 128 x f32 vector value +def v256f32 : ValueType<8192, 97>; // 256 x f32 vector value +def v512f32 : ValueType<16384, 98>; // 512 x f32 vector value +def v1024f32 : ValueType<32768, 99>; // 1024 x f32 vector value +def v2048f32 : ValueType<65536, 100>; // 2048 x f32 vector value + +def v1f64 : ValueType<64, 101>; // 1 x f64 vector value +def v2f64 : ValueType<128, 102>; // 2 x f64 vector value +def v4f64 : ValueType<256, 103>; // 4 x f64 vector value +def v8f64 : ValueType<512, 104>; // 8 x f64 vector value +def v16f64 : ValueType<1024, 105>; // 16 x f64 vector value +def v32f64 : ValueType<2048, 106>; // 32 x f64 vector value +def v64f64 : ValueType<4096, 107>; // 64 x f64 vector value +def v128f64 : ValueType<8192, 108>; // 128 x f64 vector value +def v256f64 : ValueType<16384, 109>; // 256 x f64 vector value + +def nxv1i1 : ValueType<1, 110>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 111>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 112>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 113>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 114>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 115>; // n x 32 x i1 vector value +def nxv64i1 : ValueType<64, 116>; // n x 64 x i1 vector value + +def nxv1i8 : ValueType<8, 117>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 118>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 119>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 120>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 121>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 122>; // n x 32 x i8 vector value +def nxv64i8 : ValueType<512, 123>; // n x 64 x i8 vector value + +def nxv1i16 : ValueType<16, 124>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 125>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 126>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 127>; // n x 8 x i16 vector value +def nxv16i16 : ValueType<256, 128>; // n x 16 x i16 vector value +def nxv32i16 : ValueType<512, 129>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 130>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 131>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 132>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 133>; // n x 8 x i32 vector value +def nxv16i32 : ValueType<512, 134>; // n x 16 x i32 vector value +def nxv32i32 : ValueType<1024, 135>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 136>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 137>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 138>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 139>; // n x 8 x i64 vector value +def nxv16i64 : ValueType<1024, 140>; // n x 16 x i64 vector value +def nxv32i64 : ValueType<2048, 141>; // n x 32 x i64 vector value + +def nxv1f16 : ValueType<16, 142>; // n x 1 x f16 vector value +def nxv2f16 : ValueType<32, 143>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64, 144>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 145>; // n x 8 x f16 vector value +def nxv16f16 : ValueType<256, 146>; // n x 16 x f16 vector value +def nxv32f16 : ValueType<512, 147>; // n x 32 x f16 vector value + +def nxv1bf16 : ValueType<16, 148>; // n x 1 x bf16 vector value +def nxv2bf16 : ValueType<32, 149>; // n x 2 x bf16 vector value +def nxv4bf16 : ValueType<64, 150>; // n x 4 x bf16 vector value +def nxv8bf16 : ValueType<128, 151>; // n x 8 x bf16 vector value + +def nxv1f32 : ValueType<32, 152>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64, 153>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 154>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 155>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 156>; // n x 16 x f32 vector value + +def nxv1f64 : ValueType<64, 157>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 158>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 159>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 160>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64, 161>; // X86 MMX value +def FlagVT : ValueType<0, 162>; // Pre-RA sched glue +def isVoid : ValueType<0, 163>; // Produces no value +def untyped : ValueType<8, 164>; // Produces an untyped value +def funcref : ValueType<0, 165>; // WebAssembly's funcref type +def externref : ValueType<0, 166>; // WebAssembly's externref type +def x86amx : ValueType<8192, 167>; // X86 AMX value def token : ValueType<0, 248>; // TokenTy diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -89,80 +89,82 @@ v32i16 = 41, // 32 x i16 v64i16 = 42, // 64 x i16 v128i16 = 43, // 128 x i16 - - v1i32 = 44, // 1 x i32 - v2i32 = 45, // 2 x i32 - v3i32 = 46, // 3 x i32 - v4i32 = 47, // 4 x i32 - v5i32 = 48, // 5 x i32 - v8i32 = 49, // 8 x i32 - v16i32 = 50, // 16 x i32 - v32i32 = 51, // 32 x i32 - v64i32 = 52, // 64 x i32 - v128i32 = 53, // 128 x i32 - v256i32 = 54, // 256 x i32 - v512i32 = 55, // 512 x i32 - v1024i32 = 56, // 1024 x i32 - v2048i32 = 57, // 2048 x i32 - - v1i64 = 58, // 1 x i64 - v2i64 = 59, // 2 x i64 - v4i64 = 60, // 4 x i64 - v8i64 = 61, // 8 x i64 - v16i64 = 62, // 16 x i64 - v32i64 = 63, // 32 x i64 - v64i64 = 64, // 64 x i64 - v128i64 = 65, // 128 x i64 - v256i64 = 66, // 256 x i64 - - v1i128 = 67, // 1 x i128 + v256i16 = 44, // 256 x i16 + + v1i32 = 45, // 1 x i32 + v2i32 = 46, // 2 x i32 + v3i32 = 47, // 3 x i32 + v4i32 = 48, // 4 x i32 + v5i32 = 49, // 5 x i32 + v8i32 = 50, // 8 x i32 + v16i32 = 51, // 16 x i32 + v32i32 = 52, // 32 x i32 + v64i32 = 53, // 64 x i32 + v128i32 = 54, // 128 x i32 + v256i32 = 55, // 256 x i32 + v512i32 = 56, // 512 x i32 + v1024i32 = 57, // 1024 x i32 + v2048i32 = 58, // 2048 x i32 + + v1i64 = 59, // 1 x i64 + v2i64 = 60, // 2 x i64 + v4i64 = 61, // 4 x i64 + v8i64 = 62, // 8 x i64 + v16i64 = 63, // 16 x i64 + v32i64 = 64, // 32 x i64 + v64i64 = 65, // 64 x i64 + v128i64 = 66, // 128 x i64 + v256i64 = 67, // 256 x i64 + + v1i128 = 68, // 1 x i128 FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1, LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128, - v1f16 = 68, // 1 x f16 - v2f16 = 69, // 2 x f16 - v3f16 = 70, // 3 x f16 - v4f16 = 71, // 4 x f16 - v8f16 = 72, // 8 x f16 - v16f16 = 73, // 16 x f16 - v32f16 = 74, // 32 x f16 - v64f16 = 75, // 64 x f16 - v128f16 = 76, // 128 x f16 - - v2bf16 = 77, // 2 x bf16 - v3bf16 = 78, // 3 x bf16 - v4bf16 = 79, // 4 x bf16 - v8bf16 = 80, // 8 x bf16 - v16bf16 = 81, // 16 x bf16 - v32bf16 = 82, // 32 x bf16 - v64bf16 = 83, // 64 x bf16 - v128bf16 = 84, // 128 x bf16 - - v1f32 = 85, // 1 x f32 - v2f32 = 86, // 2 x f32 - v3f32 = 87, // 3 x f32 - v4f32 = 88, // 4 x f32 - v5f32 = 89, // 5 x f32 - v8f32 = 90, // 8 x f32 - v16f32 = 91, // 16 x f32 - v32f32 = 92, // 32 x f32 - v64f32 = 93, // 64 x f32 - v128f32 = 94, // 128 x f32 - v256f32 = 95, // 256 x f32 - v512f32 = 96, // 512 x f32 - v1024f32 = 97, // 1024 x f32 - v2048f32 = 98, // 2048 x f32 - - v1f64 = 99, // 1 x f64 - v2f64 = 100, // 2 x f64 - v4f64 = 101, // 4 x f64 - v8f64 = 102, // 8 x f64 - v16f64 = 103, // 16 x f64 - v32f64 = 104, // 32 x f64 - v64f64 = 105, // 64 x f64 - v128f64 = 106, // 128 x f64 - v256f64 = 107, // 256 x f64 + v1f16 = 69, // 1 x f16 + v2f16 = 70, // 2 x f16 + v3f16 = 71, // 3 x f16 + v4f16 = 72, // 4 x f16 + v8f16 = 73, // 8 x f16 + v16f16 = 74, // 16 x f16 + v32f16 = 75, // 32 x f16 + v64f16 = 76, // 64 x f16 + v128f16 = 77, // 128 x f16 + v256f16 = 78, // 256 x f16 + + v2bf16 = 79, // 2 x bf16 + v3bf16 = 80, // 3 x bf16 + v4bf16 = 81, // 4 x bf16 + v8bf16 = 82, // 8 x bf16 + v16bf16 = 83, // 16 x bf16 + v32bf16 = 84, // 32 x bf16 + v64bf16 = 85, // 64 x bf16 + v128bf16 = 86, // 128 x bf16 + + v1f32 = 87, // 1 x f32 + v2f32 = 88, // 2 x f32 + v3f32 = 89, // 3 x f32 + v4f32 = 90, // 4 x f32 + v5f32 = 91, // 5 x f32 + v8f32 = 92, // 8 x f32 + v16f32 = 93, // 16 x f32 + v32f32 = 94, // 32 x f32 + v64f32 = 95, // 64 x f32 + v128f32 = 96, // 128 x f32 + v256f32 = 97, // 256 x f32 + v512f32 = 98, // 512 x f32 + v1024f32 = 99, // 1024 x f32 + v2048f32 = 100, // 2048 x f32 + + v1f64 = 101, // 1 x f64 + v2f64 = 102, // 2 x f64 + v4f64 = 103, // 4 x f64 + v8f64 = 104, // 8 x f64 + v16f64 = 105, // 16 x f64 + v32f64 = 106, // 32 x f64 + v64f64 = 107, // 64 x f64 + v128f64 = 108, // 128 x f64 + v256f64 = 109, // 256 x f64 FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v1f16, LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v256f64, @@ -170,68 +172,68 @@ FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1, LAST_FIXEDLEN_VECTOR_VALUETYPE = v256f64, - nxv1i1 = 108, // n x 1 x i1 - nxv2i1 = 109, // n x 2 x i1 - nxv4i1 = 110, // n x 4 x i1 - nxv8i1 = 111, // n x 8 x i1 - nxv16i1 = 112, // n x 16 x i1 - nxv32i1 = 113, // n x 32 x i1 - nxv64i1 = 114, // n x 64 x i1 - - nxv1i8 = 115, // n x 1 x i8 - nxv2i8 = 116, // n x 2 x i8 - nxv4i8 = 117, // n x 4 x i8 - nxv8i8 = 118, // n x 8 x i8 - nxv16i8 = 119, // n x 16 x i8 - nxv32i8 = 120, // n x 32 x i8 - nxv64i8 = 121, // n x 64 x i8 - - nxv1i16 = 122, // n x 1 x i16 - nxv2i16 = 123, // n x 2 x i16 - nxv4i16 = 124, // n x 4 x i16 - nxv8i16 = 125, // n x 8 x i16 - nxv16i16 = 126, // n x 16 x i16 - nxv32i16 = 127, // n x 32 x i16 - - nxv1i32 = 128, // n x 1 x i32 - nxv2i32 = 129, // n x 2 x i32 - nxv4i32 = 130, // n x 4 x i32 - nxv8i32 = 131, // n x 8 x i32 - nxv16i32 = 132, // n x 16 x i32 - nxv32i32 = 133, // n x 32 x i32 - - nxv1i64 = 134, // n x 1 x i64 - nxv2i64 = 135, // n x 2 x i64 - nxv4i64 = 136, // n x 4 x i64 - nxv8i64 = 137, // n x 8 x i64 - nxv16i64 = 138, // n x 16 x i64 - nxv32i64 = 139, // n x 32 x i64 + nxv1i1 = 110, // n x 1 x i1 + nxv2i1 = 111, // n x 2 x i1 + nxv4i1 = 112, // n x 4 x i1 + nxv8i1 = 113, // n x 8 x i1 + nxv16i1 = 114, // n x 16 x i1 + nxv32i1 = 115, // n x 32 x i1 + nxv64i1 = 116, // n x 64 x i1 + + nxv1i8 = 117, // n x 1 x i8 + nxv2i8 = 118, // n x 2 x i8 + nxv4i8 = 119, // n x 4 x i8 + nxv8i8 = 120, // n x 8 x i8 + nxv16i8 = 121, // n x 16 x i8 + nxv32i8 = 122, // n x 32 x i8 + nxv64i8 = 123, // n x 64 x i8 + + nxv1i16 = 124, // n x 1 x i16 + nxv2i16 = 125, // n x 2 x i16 + nxv4i16 = 126, // n x 4 x i16 + nxv8i16 = 127, // n x 8 x i16 + nxv16i16 = 128, // n x 16 x i16 + nxv32i16 = 129, // n x 32 x i16 + + nxv1i32 = 130, // n x 1 x i32 + nxv2i32 = 131, // n x 2 x i32 + nxv4i32 = 132, // n x 4 x i32 + nxv8i32 = 133, // n x 8 x i32 + nxv16i32 = 134, // n x 16 x i32 + nxv32i32 = 135, // n x 32 x i32 + + nxv1i64 = 136, // n x 1 x i64 + nxv2i64 = 137, // n x 2 x i64 + nxv4i64 = 138, // n x 4 x i64 + nxv8i64 = 139, // n x 8 x i64 + nxv16i64 = 140, // n x 16 x i64 + nxv32i64 = 141, // n x 32 x i64 FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1, LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64, - nxv1f16 = 140, // n x 1 x f16 - nxv2f16 = 141, // n x 2 x f16 - nxv4f16 = 142, // n x 4 x f16 - nxv8f16 = 143, // n x 8 x f16 - nxv16f16 = 144, // n x 16 x f16 - nxv32f16 = 145, // n x 32 x f16 - - nxv1bf16 = 146, // n x 1 x bf16 - nxv2bf16 = 147, // n x 2 x bf16 - nxv4bf16 = 148, // n x 4 x bf16 - nxv8bf16 = 149, // n x 8 x bf16 - - nxv1f32 = 150, // n x 1 x f32 - nxv2f32 = 151, // n x 2 x f32 - nxv4f32 = 152, // n x 4 x f32 - nxv8f32 = 153, // n x 8 x f32 - nxv16f32 = 154, // n x 16 x f32 - - nxv1f64 = 155, // n x 1 x f64 - nxv2f64 = 156, // n x 2 x f64 - nxv4f64 = 157, // n x 4 x f64 - nxv8f64 = 158, // n x 8 x f64 + nxv1f16 = 142, // n x 1 x f16 + nxv2f16 = 143, // n x 2 x f16 + nxv4f16 = 144, // n x 4 x f16 + nxv8f16 = 145, // n x 8 x f16 + nxv16f16 = 146, // n x 16 x f16 + nxv32f16 = 147, // n x 32 x f16 + + nxv1bf16 = 148, // n x 1 x bf16 + nxv2bf16 = 149, // n x 2 x bf16 + nxv4bf16 = 150, // n x 4 x bf16 + nxv8bf16 = 151, // n x 8 x bf16 + + nxv1f32 = 152, // n x 1 x f32 + nxv2f32 = 153, // n x 2 x f32 + nxv4f32 = 154, // n x 4 x f32 + nxv8f32 = 155, // n x 8 x f32 + nxv16f32 = 156, // n x 16 x f32 + + nxv1f64 = 157, // n x 1 x f64 + nxv2f64 = 158, // n x 2 x f64 + nxv4f64 = 159, // n x 4 x f64 + nxv8f64 = 160, // n x 8 x f64 FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv1f16, LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64, @@ -242,22 +244,22 @@ FIRST_VECTOR_VALUETYPE = v1i1, LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 159, // This is an X86 MMX value + x86mmx = 161, // This is an X86 MMX value - Glue = 160, // This glues nodes together during pre-RA sched + Glue = 162, // This glues nodes together during pre-RA sched - isVoid = 161, // This has no value + isVoid = 163, // This has no value - Untyped = 162, // This value takes a register, but has + Untyped = 164, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. - funcref = 163, // WebAssembly's funcref type - externref = 164, // WebAssembly's externref type - x86amx = 165, // This is an X86 AMX value + funcref = 165, // WebAssembly's funcref type + externref = 166, // WebAssembly's externref type + x86amx = 167, // This is an X86 AMX value FIRST_VALUETYPE = 1, // This is always the beginning of the list. - LAST_VALUETYPE = 166, // This always remains at the end of the list. + LAST_VALUETYPE = 168, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors @@ -543,6 +545,7 @@ case v32i16: case v64i16: case v128i16: + case v256i16: case nxv1i16: case nxv2i16: case nxv4i16: @@ -594,6 +597,7 @@ case v32f16: case v64f16: case v128f16: + case v256f16: case nxv1f16: case nxv2f16: case nxv4f16: @@ -661,6 +665,8 @@ case v512f32: return 512; case v256i1: case v256i8: + case v256i16: + case v256f16: case v256i32: case v256i64: case v256f32: @@ -973,8 +979,10 @@ case v64f32: case v32f64: return TypeSize::Fixed(2048); case nxv32i64: return TypeSize::Scalable(2048); + case v256i16: case v128i32: case v64i64: + case v256f16: case v128f32: case v64f64: return TypeSize::Fixed(4096); case v256i32: @@ -1154,6 +1162,7 @@ if (NumElements == 32) return MVT::v32i16; if (NumElements == 64) return MVT::v64i16; if (NumElements == 128) return MVT::v128i16; + if (NumElements == 256) return MVT::v256i16; break; case MVT::i32: if (NumElements == 1) return MVT::v1i32; @@ -1195,6 +1204,7 @@ if (NumElements == 32) return MVT::v32f16; if (NumElements == 64) return MVT::v64f16; if (NumElements == 128) return MVT::v128f16; + if (NumElements == 256) return MVT::v256f16; break; case MVT::bf16: if (NumElements == 2) return MVT::v2bf16; diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -257,6 +257,8 @@ return FixedVectorType::get(Type::getInt16Ty(Context), 64); case MVT::v128i16: return FixedVectorType::get(Type::getInt16Ty(Context), 128); + case MVT::v256i16: + return FixedVectorType::get(Type::getInt16Ty(Context), 256); case MVT::v1i32: return FixedVectorType::get(Type::getInt32Ty(Context), 1); case MVT::v2i32: @@ -323,6 +325,8 @@ return FixedVectorType::get(Type::getHalfTy(Context), 64); case MVT::v128f16: return FixedVectorType::get(Type::getHalfTy(Context), 128); + case MVT::v256f16: + return FixedVectorType::get(Type::getHalfTy(Context), 256); case MVT::v2bf16: return FixedVectorType::get(Type::getBFloatTy(Context), 2); case MVT::v3bf16: diff --git a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -O1 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=1024 < %s | FileCheck %s --check-prefix=RV64-1024 + +define void @interleave256(<256 x i16>* %agg.result, <128 x i16>* %0, <128 x i16>* %1) { +; RV64-1024-LABEL: interleave256: +; RV64-1024: # %bb.0: # %entry +; RV64-1024-NEXT: addi a3, zero, 128 +; RV64-1024-NEXT: vsetvli a4, a3, e16,m2,ta,mu +; RV64-1024-NEXT: vle16.v v12, (a1) +; RV64-1024-NEXT: vle16.v v8, (a2) +; RV64-1024-NEXT: addi a1, zero, 256 +; RV64-1024-NEXT: vsetvli a2, a1, e16,m4,ta,mu +; RV64-1024-NEXT: vmv.v.i v28, 0 +; RV64-1024-NEXT: vsetvli a2, a3, e16,m4,tu,mu +; RV64-1024-NEXT: vmv4r.v v16, v28 +; RV64-1024-NEXT: vslideup.vi v16, v12, 0 +; RV64-1024-NEXT: vsetvli a2, a3, e16,m2,ta,mu +; RV64-1024-NEXT: vmv.v.i v12, 0 +; RV64-1024-NEXT: vsetvli a2, a1, e16,m4,tu,mu +; RV64-1024-NEXT: vslideup.vx v16, v12, a3 +; RV64-1024-NEXT: lui a2, %hi(.LCPI0_0) +; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI0_0) +; RV64-1024-NEXT: vsetvli a4, a1, e16,m4,ta,mu +; RV64-1024-NEXT: vle16.v v20, (a2) +; RV64-1024-NEXT: vrgather.vv v24, v16, v20 +; RV64-1024-NEXT: vsetvli a2, a3, e16,m4,tu,mu +; RV64-1024-NEXT: vslideup.vi v28, v8, 0 +; RV64-1024-NEXT: vsetvli a2, a1, e16,m4,tu,mu +; RV64-1024-NEXT: vslideup.vx v28, v12, a3 +; RV64-1024-NEXT: lui a2, %hi(.LCPI0_1) +; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI0_1) +; RV64-1024-NEXT: vsetvli a3, a1, e16,m4,ta,mu +; RV64-1024-NEXT: vle16.v v12, (a2) +; RV64-1024-NEXT: vrgather.vv v8, v24, v12 +; RV64-1024-NEXT: lui a2, 1026731 +; RV64-1024-NEXT: addiw a2, a2, -1365 +; RV64-1024-NEXT: slli a2, a2, 12 +; RV64-1024-NEXT: addi a2, a2, -1365 +; RV64-1024-NEXT: slli a2, a2, 12 +; RV64-1024-NEXT: addi a2, a2, -1365 +; RV64-1024-NEXT: slli a2, a2, 12 +; RV64-1024-NEXT: addi a2, a2, -1366 +; RV64-1024-NEXT: vsetivli a3, 4, e64,m1,ta,mu +; RV64-1024-NEXT: vmv.s.x v25, a2 +; RV64-1024-NEXT: vsetivli a2, 2, e64,m1,tu,mu +; RV64-1024-NEXT: vmv1r.v v0, v25 +; RV64-1024-NEXT: vslideup.vi v0, v25, 1 +; RV64-1024-NEXT: vsetivli a2, 3, e64,m1,tu,mu +; RV64-1024-NEXT: vslideup.vi v0, v25, 2 +; RV64-1024-NEXT: vsetivli a2, 4, e64,m1,tu,mu +; RV64-1024-NEXT: vslideup.vi v0, v25, 3 +; RV64-1024-NEXT: lui a2, %hi(.LCPI0_2) +; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI0_2) +; RV64-1024-NEXT: vsetvli a3, a1, e16,m4,ta,mu +; RV64-1024-NEXT: vle16.v v12, (a2) +; RV64-1024-NEXT: vsetvli a2, a1, e16,m4,tu,mu +; RV64-1024-NEXT: vrgather.vv v8, v28, v12, v0.t +; RV64-1024-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; RV64-1024-NEXT: vse16.v v8, (a0) +; RV64-1024-NEXT: ret +entry: + %ve = load <128 x i16>, <128 x i16>* %0, align 256 + %vo = load <128 x i16>, <128 x i16>* %1, align 256 + %2 = shufflevector <128 x i16> %ve, <128 x i16> poison, <256 x i32> + %3 = shufflevector <128 x i16> %vo, <128 x i16> poison, <256 x i32> + %4 = shufflevector <256 x i16> %2, <256 x i16> %3, <256 x i32> + store <256 x i16> %4, <256 x i16>* %agg.result, align 512 + ret void +}