Index: include/llvm/CodeGen/ValueTypes.td =================================================================== --- include/llvm/CodeGen/ValueTypes.td +++ include/llvm/CodeGen/ValueTypes.td @@ -55,112 +55,114 @@ def v1i16 : ValueType<16 , 33>; // 1 x i16 vector value def v2i16 : ValueType<32 , 34>; // 2 x i16 vector value -def v4i16 : ValueType<64 , 35>; // 4 x i16 vector value -def v8i16 : ValueType<128, 36>; // 8 x i16 vector value -def v16i16 : ValueType<256, 37>; // 16 x i16 vector value -def v32i16 : ValueType<512, 38>; // 32 x i16 vector value -def v64i16 : ValueType<1024,39>; // 64 x i16 vector value -def v128i16: ValueType<2048,40>; //128 x i16 vector value - -def v1i32 : ValueType<32 , 41>; // 1 x i32 vector value -def v2i32 : ValueType<64 , 42>; // 2 x i32 vector value -def v3i32 : ValueType<96 , 43>; // 3 x i32 vector value -def v4i32 : ValueType<128, 44>; // 4 x i32 vector value -def v5i32 : ValueType<160, 45>; // 5 x i32 vector value -def v8i32 : ValueType<256, 46>; // 8 x i32 vector value -def v16i32 : ValueType<512, 47>; // 16 x i32 vector value -def v32i32 : ValueType<1024,48>; // 32 x i32 vector value -def v64i32 : ValueType<2048,49>; // 64 x i32 vector value -def v128i32 : ValueType<4096,50>; // 128 x i32 vector value -def v256i32 : ValueType<8182,51>; // 256 x i32 vector value -def v512i32 : ValueType<16384,52>; // 512 x i32 vector value -def v1024i32 : ValueType<32768,53>; // 1024 x i32 vector value -def v2048i32 : ValueType<65536,54>; // 2048 x i32 vector value - -def v1i64 : ValueType<64 , 55>; // 1 x i64 vector value -def v2i64 : ValueType<128, 56>; // 2 x i64 vector value -def v4i64 : ValueType<256, 57>; // 4 x i64 vector value -def v8i64 : ValueType<512, 58>; // 8 x i64 vector value -def v16i64 : ValueType<1024,59>; // 16 x i64 vector value -def v32i64 : ValueType<2048,60>; // 32 x i64 vector value - -def v1i128 : ValueType<128, 61>; // 1 x i128 vector value - -def nxv1i1 : ValueType<1, 62>; // n x 1 x i1 vector value -def nxv2i1 : ValueType<2, 63>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 64>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 65>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 66>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 67>; // n x 32 x i1 vector value - -def nxv1i8 : ValueType<8, 68>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 69>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 70>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 71>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 72>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 73>; // n x 32 x i8 vector value - -def nxv1i16 : ValueType<16, 74>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 75>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 76>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 77>; // n x 8 x i16 vector value -def nxv16i16: ValueType<256, 78>; // n x 16 x i16 vector value -def nxv32i16: ValueType<512, 79>; // n x 32 x i16 vector value - -def nxv1i32 : ValueType<32, 80>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 81>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 82>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 83>; // n x 8 x i32 vector value -def nxv16i32: ValueType<512, 84>; // n x 16 x i32 vector value -def nxv32i32: ValueType<1024,85>; // n x 32 x i32 vector value - -def nxv1i64 : ValueType<64, 86>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 87>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 88>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 89>; // n x 8 x i64 vector value -def nxv16i64: ValueType<1024,90>; // n x 16 x i64 vector value -def nxv32i64: ValueType<2048,91>; // n x 32 x i64 vector value - -def v2f16 : ValueType<32 , 92>; // 2 x f16 vector value -def v4f16 : ValueType<64 , 93>; // 4 x f16 vector value -def v8f16 : ValueType<128, 94>; // 8 x f16 vector value -def v1f32 : ValueType<32 , 95>; // 1 x f32 vector value -def v2f32 : ValueType<64 , 96>; // 2 x f32 vector value -def v3f32 : ValueType<96 , 97>; // 3 x f32 vector value -def v4f32 : ValueType<128, 98>; // 4 x f32 vector value -def v5f32 : ValueType<160, 99>; // 5 x f32 vector value -def v8f32 : ValueType<256, 100>; // 8 x f32 vector value -def v16f32 : ValueType<512, 101>; // 16 x f32 vector value -def v32f32 : ValueType<1024, 102>; // 32 x f32 vector value -def v64f32 : ValueType<2048, 103>; // 64 x f32 vector value -def v128f32 : ValueType<4096, 104>; // 128 x f32 vector value -def v256f32 : ValueType<8182, 105>; // 256 x f32 vector value -def v512f32 : ValueType<16384, 106>; // 512 x f32 vector value -def v1024f32 : ValueType<32768, 107>; // 1024 x f32 vector value -def v2048f32 : ValueType<65536, 108>; // 2048 x f32 vector value -def v1f64 : ValueType<64, 109>; // 1 x f64 vector value -def v2f64 : ValueType<128, 110>; // 2 x f64 vector value -def v4f64 : ValueType<256, 111>; // 4 x f64 vector value -def v8f64 : ValueType<512, 112>; // 8 x f64 vector value - -def nxv2f16 : ValueType<32 , 113>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64 , 114>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 115>; // n x 8 x f16 vector value -def nxv1f32 : ValueType<32 , 116>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64 , 117>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 118>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 119>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 120>; // n x 16 x f32 vector value -def nxv1f64 : ValueType<64, 121>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 122>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 123>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 124>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64 , 125>; // X86 MMX value -def FlagVT : ValueType<0 , 126>; // Pre-RA sched glue -def isVoid : ValueType<0 , 127>; // Produces no value -def untyped: ValueType<8 , 128>; // Produces an untyped value -def exnref: ValueType<0, 129>; // WebAssembly's exnref type +def v3i16 : ValueType<48 , 35>; // 3 x i16 vector value +def v4i16 : ValueType<64 , 36>; // 4 x i16 vector value +def v8i16 : ValueType<128, 37>; // 8 x i16 vector value +def v16i16 : ValueType<256, 38>; // 16 x i16 vector value +def v32i16 : ValueType<512, 39>; // 32 x i16 vector value +def v64i16 : ValueType<1024,40>; // 64 x i16 vector value +def v128i16: ValueType<2048,41>; //128 x i16 vector value + +def v1i32 : ValueType<32 , 42>; // 1 x i32 vector value +def v2i32 : ValueType<64 , 43>; // 2 x i32 vector value +def v3i32 : ValueType<96 , 44>; // 3 x i32 vector value +def v4i32 : ValueType<128, 45>; // 4 x i32 vector value +def v5i32 : ValueType<160, 46>; // 5 x i32 vector value +def v8i32 : ValueType<256, 47>; // 8 x i32 vector value +def v16i32 : ValueType<512, 48>; // 16 x i32 vector value +def v32i32 : ValueType<1024,49>; // 32 x i32 vector value +def v64i32 : ValueType<2048,50>; // 64 x i32 vector value +def v128i32 : ValueType<4096,51>; // 128 x i32 vector value +def v256i32 : ValueType<8182,52>; // 256 x i32 vector value +def v512i32 : ValueType<16384,53>; // 512 x i32 vector value +def v1024i32 : ValueType<32768,54>; // 1024 x i32 vector value +def v2048i32 : ValueType<65536,55>; // 2048 x i32 vector value + +def v1i64 : ValueType<64 , 56>; // 1 x i64 vector value +def v2i64 : ValueType<128, 57>; // 2 x i64 vector value +def v4i64 : ValueType<256, 58>; // 4 x i64 vector value +def v8i64 : ValueType<512, 59>; // 8 x i64 vector value +def v16i64 : ValueType<1024,60>; // 16 x i64 vector value +def v32i64 : ValueType<2048,61>; // 32 x i64 vector value + +def v1i128 : ValueType<128, 62>; // 1 x i128 vector value + +def nxv1i1 : ValueType<1, 63>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 64>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 65>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 66>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 67>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 68>; // n x 32 x i1 vector value + +def nxv1i8 : ValueType<8, 69>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 70>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 71>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 72>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 73>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 74>; // n x 32 x i8 vector value + +def nxv1i16 : ValueType<16, 75>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 76>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 77>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 78>; // n x 8 x i16 vector value +def nxv16i16: ValueType<256, 79>; // n x 16 x i16 vector value +def nxv32i16: ValueType<512, 80>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 81>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 82>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 83>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 84>; // n x 8 x i32 vector value +def nxv16i32: ValueType<512, 85>; // n x 16 x i32 vector value +def nxv32i32: ValueType<1024,86>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 87>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 88>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 89>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 90>; // n x 8 x i64 vector value +def nxv16i64: ValueType<1024,91>; // n x 16 x i64 vector value +def nxv32i64: ValueType<2048,92>; // n x 32 x i64 vector value + +def v2f16 : ValueType<32 , 93>; // 2 x f16 vector value +def v3f16 : ValueType<32 , 94>; // 3 x f16 vector value +def v4f16 : ValueType<64 , 95>; // 4 x f16 vector value +def v8f16 : ValueType<128, 96>; // 8 x f16 vector value +def v1f32 : ValueType<32 , 97>; // 1 x f32 vector value +def v2f32 : ValueType<64 , 98>; // 2 x f32 vector value +def v3f32 : ValueType<96 , 99>; // 3 x f32 vector value +def v4f32 : ValueType<128, 100>; // 4 x f32 vector value +def v5f32 : ValueType<160, 101>; // 5 x f32 vector value +def v8f32 : ValueType<256, 102>; // 8 x f32 vector value +def v16f32 : ValueType<512, 103>; // 16 x f32 vector value +def v32f32 : ValueType<1024, 104>; // 32 x f32 vector value +def v64f32 : ValueType<2048, 105>; // 64 x f32 vector value +def v128f32 : ValueType<4096, 106>; // 128 x f32 vector value +def v256f32 : ValueType<8182, 107>; // 256 x f32 vector value +def v512f32 : ValueType<16384, 108>; // 512 x f32 vector value +def v1024f32 : ValueType<32768, 109>; // 1024 x f32 vector value +def v2048f32 : ValueType<65536, 110>; // 2048 x f32 vector value +def v1f64 : ValueType<64, 111>; // 1 x f64 vector value +def v2f64 : ValueType<128, 112>; // 2 x f64 vector value +def v4f64 : ValueType<256, 113>; // 4 x f64 vector value +def v8f64 : ValueType<512, 114>; // 8 x f64 vector value + +def nxv2f16 : ValueType<32 , 115>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64 , 116>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 117>; // n x 8 x f16 vector value +def nxv1f32 : ValueType<32 , 118>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64 , 119>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 120>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 121>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 122>; // n x 16 x f32 vector value +def nxv1f64 : ValueType<64, 123>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 124>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 125>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 126>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64 , 127>; // X86 MMX value +def FlagVT : ValueType<0 , 128>; // Pre-RA sched glue +def isVoid : ValueType<0 , 129>; // Produces no value +def untyped: ValueType<8 , 130>; // Produces an untyped value +def exnref: ValueType<0, 131>; // WebAssembly's exnref type def token : ValueType<0 , 248>; // TokenTy def MetadataVT: ValueType<0, 249>; // Metadata Index: include/llvm/Support/MachineValueType.h =================================================================== --- include/llvm/Support/MachineValueType.h +++ include/llvm/Support/MachineValueType.h @@ -79,72 +79,73 @@ v1i16 = 33, // 1 x i16 v2i16 = 34, // 2 x i16 - v4i16 = 35, // 4 x i16 - v8i16 = 36, // 8 x i16 - v16i16 = 37, // 16 x i16 - v32i16 = 38, // 32 x i16 - v64i16 = 39, // 64 x i16 - v128i16 = 40, //128 x i16 - - v1i32 = 41, // 1 x i32 - v2i32 = 42, // 2 x i32 - v3i32 = 43, // 3 x i32 - v4i32 = 44, // 4 x i32 - v5i32 = 45, // 5 x i32 - v8i32 = 46, // 8 x i32 - v16i32 = 47, // 16 x i32 - v32i32 = 48, // 32 x i32 - v64i32 = 49, // 64 x i32 - v128i32 = 50, // 128 x i32 - v256i32 = 51, // 256 x i32 - v512i32 = 52, // 512 x i32 - v1024i32 = 53, // 1024 x i32 - v2048i32 = 54, // 2048 x i32 - - v1i64 = 55, // 1 x i64 - v2i64 = 56, // 2 x i64 - v4i64 = 57, // 4 x i64 - v8i64 = 58, // 8 x i64 - v16i64 = 59, // 16 x i64 - v32i64 = 60, // 32 x i64 - - v1i128 = 61, // 1 x i128 + v3i16 = 35, // 3 x i16 + v4i16 = 36, // 4 x i16 + v8i16 = 37, // 8 x i16 + v16i16 = 38, // 16 x i16 + v32i16 = 39, // 32 x i16 + v64i16 = 40, // 64 x i16 + v128i16 = 41, //128 x i16 + + v1i32 = 42, // 1 x i32 + v2i32 = 43, // 2 x i32 + v3i32 = 44, // 3 x i32 + v4i32 = 45, // 4 x i32 + v5i32 = 46, // 5 x i32 + v8i32 = 47, // 8 x i32 + v16i32 = 48, // 16 x i32 + v32i32 = 49, // 32 x i32 + v64i32 = 50, // 64 x i32 + v128i32 = 51, // 128 x i32 + v256i32 = 52, // 256 x i32 + v512i32 = 53, // 512 x i32 + v1024i32 = 54, // 1024 x i32 + v2048i32 = 55, // 2048 x i32 + + v1i64 = 56, // 1 x i64 + v2i64 = 57, // 2 x i64 + v4i64 = 58, // 4 x i64 + v8i64 = 59, // 8 x i64 + v16i64 = 60, // 16 x i64 + v32i64 = 61, // 32 x i64 + + v1i128 = 62, // 1 x i128 // Scalable integer types - nxv1i1 = 62, // n x 1 x i1 - nxv2i1 = 63, // n x 2 x i1 - nxv4i1 = 64, // n x 4 x i1 - nxv8i1 = 65, // n x 8 x i1 - nxv16i1 = 66, // n x 16 x i1 - nxv32i1 = 67, // n x 32 x i1 - - nxv1i8 = 68, // n x 1 x i8 - nxv2i8 = 69, // n x 2 x i8 - nxv4i8 = 70, // n x 4 x i8 - nxv8i8 = 71, // n x 8 x i8 - nxv16i8 = 72, // n x 16 x i8 - nxv32i8 = 73, // n x 32 x i8 - - nxv1i16 = 74, // n x 1 x i16 - nxv2i16 = 75, // n x 2 x i16 - nxv4i16 = 76, // n x 4 x i16 - nxv8i16 = 77, // n x 8 x i16 - nxv16i16 = 78, // n x 16 x i16 - nxv32i16 = 79, // n x 32 x i16 - - nxv1i32 = 80, // n x 1 x i32 - nxv2i32 = 81, // n x 2 x i32 - nxv4i32 = 82, // n x 4 x i32 - nxv8i32 = 83, // n x 8 x i32 - nxv16i32 = 84, // n x 16 x i32 - nxv32i32 = 85, // n x 32 x i32 - - nxv1i64 = 86, // n x 1 x i64 - nxv2i64 = 87, // n x 2 x i64 - nxv4i64 = 88, // n x 4 x i64 - nxv8i64 = 89, // n x 8 x i64 - nxv16i64 = 90, // n x 16 x i64 - nxv32i64 = 91, // n x 32 x i64 + nxv1i1 = 63, // n x 1 x i1 + nxv2i1 = 64, // n x 2 x i1 + nxv4i1 = 65, // n x 4 x i1 + nxv8i1 = 66, // n x 8 x i1 + nxv16i1 = 67, // n x 16 x i1 + nxv32i1 = 68, // n x 32 x i1 + + nxv1i8 = 69, // n x 1 x i8 + nxv2i8 = 70, // n x 2 x i8 + nxv4i8 = 71, // n x 4 x i8 + nxv8i8 = 72, // n x 8 x i8 + nxv16i8 = 73, // n x 16 x i8 + nxv32i8 = 74, // n x 32 x i8 + + nxv1i16 = 75, // n x 1 x i16 + nxv2i16 = 76, // n x 2 x i16 + nxv4i16 = 77, // n x 4 x i16 + nxv8i16 = 78, // n x 8 x i16 + nxv16i16 = 79, // n x 16 x i16 + nxv32i16 = 80, // n x 32 x i16 + + nxv1i32 = 81, // n x 1 x i32 + nxv2i32 = 82, // n x 2 x i32 + nxv4i32 = 83, // n x 4 x i32 + nxv8i32 = 84, // n x 8 x i32 + nxv16i32 = 85, // n x 16 x i32 + nxv32i32 = 86, // n x 32 x i32 + + nxv1i64 = 87, // n x 1 x i64 + nxv2i64 = 88, // n x 2 x i64 + nxv4i64 = 89, // n x 4 x i64 + nxv8i64 = 90, // n x 8 x i64 + nxv16i64 = 91, // n x 16 x i64 + nxv32i64 = 92, // n x 32 x i64 FIRST_INTEGER_VECTOR_VALUETYPE = v1i1, LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64, @@ -152,40 +153,41 @@ FIRST_INTEGER_SCALABLE_VALUETYPE = nxv1i1, LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64, - v2f16 = 92, // 2 x f16 - v4f16 = 93, // 4 x f16 - v8f16 = 94, // 8 x f16 - v1f32 = 95, // 1 x f32 - v2f32 = 96, // 2 x f32 - v3f32 = 97, // 3 x f32 - v4f32 = 98, // 4 x f32 - v5f32 = 99, // 5 x f32 - v8f32 = 100, // 8 x f32 - v16f32 = 101, // 16 x f32 - v32f32 = 102, // 32 x f32 - v64f32 = 103, // 64 x f32 - v128f32 = 104, // 128 x f32 - v256f32 = 105, // 256 x f32 - v512f32 = 106, // 512 x f32 - v1024f32 = 107, // 1024 x f32 - v2048f32 = 108, // 2048 x f32 - v1f64 = 109, // 1 x f64 - v2f64 = 110, // 2 x f64 - v4f64 = 111, // 4 x f64 - v8f64 = 112, // 8 x f64 - - nxv2f16 = 113, // n x 2 x f16 - nxv4f16 = 114, // n x 4 x f16 - nxv8f16 = 115, // n x 8 x f16 - nxv1f32 = 116, // n x 1 x f32 - nxv2f32 = 117, // n x 2 x f32 - nxv4f32 = 118, // n x 4 x f32 - nxv8f32 = 119, // n x 8 x f32 - nxv16f32 = 120, // n x 16 x f32 - nxv1f64 = 121, // n x 1 x f64 - nxv2f64 = 122, // n x 2 x f64 - nxv4f64 = 123, // n x 4 x f64 - nxv8f64 = 124, // n x 8 x f64 + v2f16 = 93, // 2 x f16 + v3f16 = 94, // 3 x f16 + v4f16 = 95, // 4 x f16 + v8f16 = 96, // 8 x f16 + v1f32 = 97, // 1 x f32 + v2f32 = 98, // 2 x f32 + v3f32 = 99, // 3 x f32 + v4f32 = 100, // 4 x f32 + v5f32 = 101, // 5 x f32 + v8f32 = 102, // 8 x f32 + v16f32 = 103, // 16 x f32 + v32f32 = 104, // 32 x f32 + v64f32 = 105, // 64 x f32 + v128f32 = 106, // 128 x f32 + v256f32 = 107, // 256 x f32 + v512f32 = 108, // 512 x f32 + v1024f32 = 109, // 1024 x f32 + v2048f32 = 110, // 2048 x f32 + v1f64 = 111, // 1 x f64 + v2f64 = 112, // 2 x f64 + v4f64 = 113, // 4 x f64 + v8f64 = 114, // 8 x f64 + + nxv2f16 = 115, // n x 2 x f16 + nxv4f16 = 116, // n x 4 x f16 + nxv8f16 = 117, // n x 8 x f16 + nxv1f32 = 118, // n x 1 x f32 + nxv2f32 = 119, // n x 2 x f32 + nxv4f32 = 120, // n x 4 x f32 + nxv8f32 = 121, // n x 8 x f32 + nxv16f32 = 122, // n x 16 x f32 + nxv1f64 = 123, // n x 1 x f64 + nxv2f64 = 124, // n x 2 x f64 + nxv4f64 = 125, // n x 4 x f64 + nxv8f64 = 126, // n x 8 x f64 FIRST_FP_VECTOR_VALUETYPE = v2f16, LAST_FP_VECTOR_VALUETYPE = nxv8f64, @@ -196,20 +198,20 @@ FIRST_VECTOR_VALUETYPE = v1i1, LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 125, // This is an X86 MMX value + x86mmx = 127, // This is an X86 MMX value - Glue = 126, // This glues nodes together during pre-RA sched + Glue = 128, // This glues nodes together during pre-RA sched - isVoid = 127, // This has no value + isVoid = 129, // This has no value - Untyped = 128, // This value takes a register, but has + Untyped = 130, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. - exnref = 129, // WebAssembly's exnref type + exnref = 131, // WebAssembly's exnref type FIRST_VALUETYPE = 1, // This is always the beginning of the list. - LAST_VALUETYPE = 130, // This always remains at the end of the list. + LAST_VALUETYPE = 132, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors @@ -465,6 +467,7 @@ case nxv32i8: return i8; case v1i16: case v2i16: + case v3i16: case v4i16: case v8i16: case v16i16: @@ -511,6 +514,7 @@ case nxv32i64: return i64; case v1i128: return i128; case v2f16: + case v3f16: case v4f16: case v8f16: case nxv2f16: @@ -629,7 +633,9 @@ case nxv4f32: case nxv4f64: return 4; case v3i32: - case v3f32: return 3; + case v3f32: + case v3i16: + case v3f16: return 3; case v2i1: case v2i8: case v2i16: @@ -721,6 +727,8 @@ case nxv1i32: case nxv2f16: case nxv1f32: return 32; + case v3i16: + case v3f16: return 48; case x86mmx: case f64 : case i64 : @@ -917,6 +925,7 @@ case MVT::i16: if (NumElements == 1) return MVT::v1i16; if (NumElements == 2) return MVT::v2i16; + if (NumElements == 3) return MVT::v3i16; if (NumElements == 4) return MVT::v4i16; if (NumElements == 8) return MVT::v8i16; if (NumElements == 16) return MVT::v16i16; @@ -953,6 +962,7 @@ break; case MVT::f16: if (NumElements == 2) return MVT::v2f16; + if (NumElements == 3) return MVT::v3f16; if (NumElements == 4) return MVT::v4f16; if (NumElements == 8) return MVT::v8f16; break; Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -824,7 +824,8 @@ LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT); assert((LA == TypeLegal || LA == TypeSoftenFloat || - ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger) && + (NVT.isVector() || + ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) && "Promote may not follow Expand or Promote"); if (LA == TypeSplitVector) Index: lib/CodeGen/ValueTypes.cpp =================================================================== --- lib/CodeGen/ValueTypes.cpp +++ lib/CodeGen/ValueTypes.cpp @@ -157,6 +157,7 @@ case MVT::v256i8: return "v256i8"; case MVT::v1i16: return "v1i16"; case MVT::v2i16: return "v2i16"; + case MVT::v3i16: return "v3i16"; case MVT::v4i16: return "v4i16"; case MVT::v8i16: return "v8i16"; case MVT::v16i16: return "v16i16"; @@ -187,6 +188,7 @@ case MVT::v1f32: return "v1f32"; case MVT::v2f32: return "v2f32"; case MVT::v2f16: return "v2f16"; + case MVT::v3f16: return "v3f16"; case MVT::v4f16: return "v4f16"; case MVT::v8f16: return "v8f16"; case MVT::v3f32: return "v3f32"; @@ -254,6 +256,7 @@ case MVT::v256i8: return VectorType::get(Type::getInt8Ty(Context), 256); case MVT::v1i16: return VectorType::get(Type::getInt16Ty(Context), 1); case MVT::v2i16: return VectorType::get(Type::getInt16Ty(Context), 2); + case MVT::v3i16: return VectorType::get(Type::getInt16Ty(Context), 3); case MVT::v4i16: return VectorType::get(Type::getInt16Ty(Context), 4); case MVT::v8i16: return VectorType::get(Type::getInt16Ty(Context), 8); case MVT::v16i16: return VectorType::get(Type::getInt16Ty(Context), 16); @@ -282,6 +285,7 @@ case MVT::v32i64: return VectorType::get(Type::getInt64Ty(Context), 32); case MVT::v1i128: return VectorType::get(Type::getInt128Ty(Context), 1); case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2); + case MVT::v3f16: return VectorType::get(Type::getHalfTy(Context), 3); case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4); case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8); case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1); Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -144,6 +144,9 @@ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v3i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v3i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v3i16, Expand); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand); @@ -151,6 +154,7 @@ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand); @@ -212,6 +216,7 @@ setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand); + setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand); setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand); setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand); Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -178,6 +178,7 @@ setOperationAction(ISD::STORE, MVT::v32i32, Custom); setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); + setTruncStoreAction(MVT::v3i32, MVT::v3i16, Expand); setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); @@ -215,6 +216,7 @@ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v3i16, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom); Index: test/CodeGen/X86/promote-vec3.ll =================================================================== --- test/CodeGen/X86/promote-vec3.ll +++ test/CodeGen/X86/promote-vec3.ll @@ -9,55 +9,35 @@ ; SSE3-LABEL: zext_i8: ; SSE3: # %bb.0: ; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %edx ; SSE3-NEXT: movd %eax, %xmm0 -; SSE3-NEXT: pextrw $0, %xmm0, %eax -; SSE3-NEXT: # kill: def $ax killed $ax killed $eax -; SSE3-NEXT: # kill: def $dx killed $dx killed $edx -; SSE3-NEXT: # kill: def $cx killed $cx killed $ecx +; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: pinsrw $1, %eax, %xmm0 +; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: pinsrw $2, %eax, %xmm0 ; SSE3-NEXT: retl ; ; SSE41-LABEL: zext_i8: ; SSE41: # %bb.0: ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 +; SSE41-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0 ; SSE41-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 -; SSE41-NEXT: pextrw $2, %xmm0, %edx -; SSE41-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 -; SSE41-NEXT: movd %xmm0, %eax -; SSE41-NEXT: pextrw $4, %xmm0, %ecx -; SSE41-NEXT: # kill: def $ax killed $ax killed $eax -; SSE41-NEXT: # kill: def $dx killed $dx killed $edx -; SSE41-NEXT: # kill: def $cx killed $cx killed $ecx ; SSE41-NEXT: retl ; ; AVX-32-LABEL: zext_i8: ; AVX-32: # %bb.0: ; AVX-32-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX-32-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX-32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm1 -; AVX-32-NEXT: vpextrw $2, %xmm0, %edx -; AVX-32-NEXT: vmovd %xmm1, %eax -; AVX-32-NEXT: vpextrw $4, %xmm1, %ecx -; AVX-32-NEXT: # kill: def $ax killed $ax killed $eax -; AVX-32-NEXT: # kill: def $dx killed $dx killed $edx -; AVX-32-NEXT: # kill: def $cx killed $cx killed $ecx ; AVX-32-NEXT: retl ; ; AVX-64-LABEL: zext_i8: ; AVX-64: # %bb.0: -; AVX-64-NEXT: vmovd %edi, %xmm0 -; AVX-64-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 -; AVX-64-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 -; AVX-64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX-64-NEXT: vmovd %xmm0, %eax -; AVX-64-NEXT: vpextrw $2, %xmm0, %edx -; AVX-64-NEXT: vpextrw $4, %xmm0, %ecx -; AVX-64-NEXT: # kill: def $ax killed $ax killed $eax -; AVX-64-NEXT: # kill: def $dx killed $dx killed $edx -; AVX-64-NEXT: # kill: def $cx killed $cx killed $ecx +; AVX-64-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX-64-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0 +; AVX-64-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0 +; AVX-64-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0 ; AVX-64-NEXT: retq %2 = zext <3 x i8> %0 to <3 x i16> ret <3 x i16> %2 @@ -66,64 +46,42 @@ define <3 x i16> @sext_i8(<3 x i8>) { ; SSE3-LABEL: sext_i8: ; SSE3: # %bb.0: -; SSE3-NEXT: movl {{[0-9]+}}(%esp), %eax -; SSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE3-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: movd %eax, %xmm0 +; SSE3-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; SSE3-NEXT: pinsrw $1, %eax, %xmm0 -; SSE3-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; SSE3-NEXT: pinsrw $2, %eax, %xmm0 -; SSE3-NEXT: psllw $8, %xmm0 -; SSE3-NEXT: psraw $8, %xmm0 -; SSE3-NEXT: pextrw $0, %xmm0, %eax -; SSE3-NEXT: pextrw $1, %xmm0, %edx -; SSE3-NEXT: pextrw $2, %xmm0, %ecx -; SSE3-NEXT: # kill: def $ax killed $ax killed $eax -; SSE3-NEXT: # kill: def $dx killed $dx killed $edx -; SSE3-NEXT: # kill: def $cx killed $cx killed $ecx ; SSE3-NEXT: retl ; ; SSE41-LABEL: sext_i8: ; SSE41: # %bb.0: -; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE41-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 -; SSE41-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 -; SSE41-NEXT: pslld $24, %xmm0 -; SSE41-NEXT: psrad $24, %xmm0 -; SSE41-NEXT: movd %xmm0, %eax -; SSE41-NEXT: pextrw $2, %xmm0, %edx -; SSE41-NEXT: pextrw $4, %xmm0, %ecx -; SSE41-NEXT: # kill: def $ax killed $ax killed $eax -; SSE41-NEXT: # kill: def $dx killed $dx killed $edx -; SSE41-NEXT: # kill: def $cx killed $cx killed $ecx +; SSE41-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; SSE41-NEXT: movd %eax, %xmm0 +; SSE41-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; SSE41-NEXT: pinsrw $1, %eax, %xmm0 +; SSE41-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; SSE41-NEXT: pinsrw $2, %eax, %xmm0 ; SSE41-NEXT: retl ; ; AVX-32-LABEL: sext_i8: ; AVX-32: # %bb.0: -; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX-32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX-32-NEXT: vpslld $24, %xmm0, %xmm0 -; AVX-32-NEXT: vpsrad $24, %xmm0, %xmm0 -; AVX-32-NEXT: vmovd %xmm0, %eax -; AVX-32-NEXT: vpextrw $2, %xmm0, %edx -; AVX-32-NEXT: vpextrw $4, %xmm0, %ecx -; AVX-32-NEXT: # kill: def $ax killed $ax killed $eax -; AVX-32-NEXT: # kill: def $dx killed $dx killed $edx -; AVX-32-NEXT: # kill: def $cx killed $cx killed $ecx +; AVX-32-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; AVX-32-NEXT: vmovd %eax, %xmm0 +; AVX-32-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; AVX-32-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 +; AVX-32-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; AVX-32-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ; AVX-32-NEXT: retl ; ; AVX-64-LABEL: sext_i8: ; AVX-64: # %bb.0: -; AVX-64-NEXT: vmovd %edi, %xmm0 -; AVX-64-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 -; AVX-64-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 -; AVX-64-NEXT: vpslld $24, %xmm0, %xmm0 -; AVX-64-NEXT: vpsrad $24, %xmm0, %xmm0 -; AVX-64-NEXT: vmovd %xmm0, %eax -; AVX-64-NEXT: vpextrw $2, %xmm0, %edx -; AVX-64-NEXT: vpextrw $4, %xmm0, %ecx -; AVX-64-NEXT: # kill: def $ax killed $ax killed $eax -; AVX-64-NEXT: # kill: def $dx killed $dx killed $edx -; AVX-64-NEXT: # kill: def $cx killed $cx killed $ecx +; AVX-64-NEXT: movsbl %sil, %eax +; AVX-64-NEXT: movsbl %dil, %ecx +; AVX-64-NEXT: vmovd %ecx, %xmm0 +; AVX-64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 +; AVX-64-NEXT: movsbl %dl, %eax +; AVX-64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ; AVX-64-NEXT: retq %2 = sext <3 x i8> %0 to <3 x i16> ret <3 x i16> %2 Index: test/CodeGen/X86/vec_cast.ll =================================================================== --- test/CodeGen/X86/vec_cast.ll +++ test/CodeGen/X86/vec_cast.ll @@ -27,22 +27,13 @@ define <3 x i32> @b(<3 x i16> %a) nounwind { ; CHECK-LIN-LABEL: b: ; CHECK-LIN: # %bb.0: -; CHECK-LIN-NEXT: pxor %xmm0, %xmm0 -; CHECK-LIN-NEXT: pinsrw $1, %edi, %xmm0 -; CHECK-LIN-NEXT: pinsrw $3, %esi, %xmm0 -; CHECK-LIN-NEXT: pinsrw $5, %edx, %xmm0 +; CHECK-LIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; CHECK-LIN-NEXT: psrad $16, %xmm0 ; CHECK-LIN-NEXT: retq ; ; CHECK-WIN-LABEL: b: ; CHECK-WIN: # %bb.0: -; CHECK-WIN-NEXT: # kill: def $r8w killed $r8w def $r8d -; CHECK-WIN-NEXT: # kill: def $dx killed $dx def $edx -; CHECK-WIN-NEXT: # kill: def $cx killed $cx def $ecx -; CHECK-WIN-NEXT: pxor %xmm0, %xmm0 -; CHECK-WIN-NEXT: pinsrw $1, %ecx, %xmm0 -; CHECK-WIN-NEXT: pinsrw $3, %edx, %xmm0 -; CHECK-WIN-NEXT: pinsrw $5, %r8d, %xmm0 +; CHECK-WIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-WIN-NEXT: psrad $16, %xmm0 ; CHECK-WIN-NEXT: retq %c = sext <3 x i16> %a to <3 x i32> @@ -87,21 +78,15 @@ define <3 x i32> @e(<3 x i16> %a) nounwind { ; CHECK-LIN-LABEL: e: ; CHECK-LIN: # %bb.0: -; CHECK-LIN-NEXT: pxor %xmm0, %xmm0 -; CHECK-LIN-NEXT: pinsrw $0, %edi, %xmm0 -; CHECK-LIN-NEXT: pinsrw $2, %esi, %xmm0 -; CHECK-LIN-NEXT: pinsrw $4, %edx, %xmm0 +; CHECK-LIN-NEXT: pxor %xmm1, %xmm1 +; CHECK-LIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-LIN-NEXT: retq ; ; CHECK-WIN-LABEL: e: ; CHECK-WIN: # %bb.0: -; CHECK-WIN-NEXT: # kill: def $r8w killed $r8w def $r8d -; CHECK-WIN-NEXT: # kill: def $dx killed $dx def $edx -; CHECK-WIN-NEXT: # kill: def $cx killed $cx def $ecx -; CHECK-WIN-NEXT: pxor %xmm0, %xmm0 -; CHECK-WIN-NEXT: pinsrw $0, %ecx, %xmm0 -; CHECK-WIN-NEXT: pinsrw $2, %edx, %xmm0 -; CHECK-WIN-NEXT: pinsrw $4, %r8d, %xmm0 +; CHECK-WIN-NEXT: movdqa (%rcx), %xmm0 +; CHECK-WIN-NEXT: pxor %xmm1, %xmm1 +; CHECK-WIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-WIN-NEXT: retq %c = zext <3 x i16> %a to <3 x i32> ret <3 x i32> %c @@ -146,23 +131,14 @@ define <3 x i16> @h(<3 x i32> %a) nounwind { ; CHECK-LIN-LABEL: h: ; CHECK-LIN: # %bb.0: -; CHECK-LIN-NEXT: movd %xmm0, %eax -; CHECK-LIN-NEXT: pextrw $2, %xmm0, %edx -; CHECK-LIN-NEXT: pextrw $4, %xmm0, %ecx -; CHECK-LIN-NEXT: # kill: def $ax killed $ax killed $eax -; CHECK-LIN-NEXT: # kill: def $dx killed $dx killed $edx -; CHECK-LIN-NEXT: # kill: def $cx killed $cx killed $ecx +; CHECK-LIN-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; CHECK-LIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; CHECK-LIN-NEXT: retq ; ; CHECK-WIN-LABEL: h: ; CHECK-WIN: # %bb.0: -; CHECK-WIN-NEXT: movdqa (%rcx), %xmm0 -; CHECK-WIN-NEXT: movd %xmm0, %eax -; CHECK-WIN-NEXT: pextrw $2, %xmm0, %edx -; CHECK-WIN-NEXT: pextrw $4, %xmm0, %ecx -; CHECK-WIN-NEXT: # kill: def $ax killed $ax killed $eax -; CHECK-WIN-NEXT: # kill: def $dx killed $dx killed $edx -; CHECK-WIN-NEXT: # kill: def $cx killed $cx killed $ecx +; CHECK-WIN-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7] +; CHECK-WIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; CHECK-WIN-NEXT: retq %c = trunc <3 x i32> %a to <3 x i16> ret <3 x i16> %c Index: test/CodeGen/X86/widen_load-2.ll =================================================================== --- test/CodeGen/X86/widen_load-2.ll +++ test/CodeGen/X86/widen_load-2.ll @@ -143,32 +143,22 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind { ; X86-LABEL: add3i16: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 16(%ebp), %ecx -; X86-NEXT: movl 12(%ebp), %edx -; X86-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; X86-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; X86-NEXT: paddd %xmm0, %xmm1 -; X86-NEXT: pextrw $4, %xmm1, 4(%eax) -; X86-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; X86-NEXT: movd %xmm1, (%eax) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movdqa (%edx), %xmm0 +; X86-NEXT: paddw (%ecx), %xmm0 +; X86-NEXT: pextrw $2, %xmm0, 4(%eax) +; X86-NEXT: movd %xmm0, (%eax) ; X86-NEXT: retl $4 ; ; X64-LABEL: add3i16: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; X64-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; X64-NEXT: paddd %xmm0, %xmm1 -; X64-NEXT: pextrw $4, %xmm1, 4(%rdi) -; X64-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; X64-NEXT: movd %xmm1, (%rdi) +; X64-NEXT: movdqa (%rsi), %xmm0 +; X64-NEXT: paddw (%rdx), %xmm0 +; X64-NEXT: pextrw $2, %xmm0, 4(%rdi) +; X64-NEXT: movd %xmm0, (%rdi) ; X64-NEXT: retq %a = load %i16vec3, %i16vec3* %ap, align 16 %b = load %i16vec3, %i16vec3* %bp, align 16 Index: test/TableGen/intrinsic-varargs.td =================================================================== --- test/TableGen/intrinsic-varargs.td +++ test/TableGen/intrinsic-varargs.td @@ -25,7 +25,7 @@ } // isVoid needs to match the definition in ValueTypes.td -def isVoid : ValueType<0, 127>; // Produces no value +def isVoid : ValueType<0, 129>; // Produces no value def llvm_vararg_ty : LLVMType; // this means vararg here // CHECK: /* 0 */ 0, 29, 0, Index: utils/TableGen/CodeGenTarget.cpp =================================================================== --- utils/TableGen/CodeGenTarget.cpp +++ utils/TableGen/CodeGenTarget.cpp @@ -98,6 +98,7 @@ case MVT::v256i8: return "MVT::v256i8"; case MVT::v1i16: return "MVT::v1i16"; case MVT::v2i16: return "MVT::v2i16"; + case MVT::v3i16: return "MVT::v3i16"; case MVT::v4i16: return "MVT::v4i16"; case MVT::v8i16: return "MVT::v8i16"; case MVT::v16i16: return "MVT::v16i16"; @@ -126,6 +127,7 @@ case MVT::v32i64: return "MVT::v32i64"; case MVT::v1i128: return "MVT::v1i128"; case MVT::v2f16: return "MVT::v2f16"; + case MVT::v3f16: return "MVT::v3f16"; case MVT::v4f16: return "MVT::v4f16"; case MVT::v8f16: return "MVT::v8f16"; case MVT::v1f32: return "MVT::v1f32"; Index: utils/TableGen/IntrinsicEmitter.cpp =================================================================== --- utils/TableGen/IntrinsicEmitter.cpp +++ utils/TableGen/IntrinsicEmitter.cpp @@ -220,7 +220,8 @@ IIT_STRUCT7 = 39, IIT_STRUCT8 = 40, IIT_F128 = 41, - IIT_VEC_ELEMENT = 42 + IIT_VEC_ELEMENT = 42, + IIT_V48 = 43 }; static void EncodeFixedValueType(MVT::SimpleValueType VT, @@ -347,6 +348,7 @@ case 8: Sig.push_back(IIT_V8); break; case 16: Sig.push_back(IIT_V16); break; case 32: Sig.push_back(IIT_V32); break; + case 48: Sig.push_back(IIT_V48); break; case 64: Sig.push_back(IIT_V64); break; case 512: Sig.push_back(IIT_V512); break; case 1024: Sig.push_back(IIT_V1024); break;