Index: llvm/trunk/include/llvm/CodeGen/ValueTypes.td =================================================================== --- llvm/trunk/include/llvm/CodeGen/ValueTypes.td +++ llvm/trunk/include/llvm/CodeGen/ValueTypes.td @@ -126,43 +126,45 @@ def v3f16 : ValueType<48 , 94>; // 3 x f16 vector value def v4f16 : ValueType<64 , 95>; // 4 x f16 vector value def v8f16 : ValueType<128, 96>; // 8 x f16 vector value -def v1f32 : ValueType<32 , 97>; // 1 x f32 vector value -def v2f32 : ValueType<64 , 98>; // 2 x f32 vector value -def v3f32 : ValueType<96 , 99>; // 3 x f32 vector value -def v4f32 : ValueType<128, 100>; // 4 x f32 vector value -def v5f32 : ValueType<160, 101>; // 5 x f32 vector value -def v8f32 : ValueType<256, 102>; // 8 x f32 vector value -def v16f32 : ValueType<512, 103>; // 16 x f32 vector value -def v32f32 : ValueType<1024, 104>; // 32 x f32 vector value -def v64f32 : ValueType<2048, 105>; // 64 x f32 vector value -def v128f32 : ValueType<4096, 106>; // 128 x f32 vector value -def v256f32 : ValueType<8182, 107>; // 256 x f32 vector value -def v512f32 : ValueType<16384, 108>; // 512 x f32 vector value -def v1024f32 : ValueType<32768, 109>; // 1024 x f32 vector value -def v2048f32 : ValueType<65536, 110>; // 2048 x f32 vector value -def v1f64 : ValueType<64, 111>; // 1 x f64 vector value -def v2f64 : ValueType<128, 112>; // 2 x f64 vector value -def v4f64 : ValueType<256, 113>; // 4 x f64 vector value -def v8f64 : ValueType<512, 114>; // 8 x f64 vector value - -def nxv2f16 : ValueType<32 , 115>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64 , 116>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 117>; // n x 8 x f16 vector value -def nxv1f32 : ValueType<32 , 118>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64 , 119>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 120>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 121>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 122>; // n x 16 x f32 vector value -def nxv1f64 : ValueType<64, 123>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 124>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 125>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 126>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64 , 127>; // X86 MMX value -def FlagVT : ValueType<0 , 128>; // Pre-RA sched glue -def isVoid : ValueType<0 , 129>; // Produces no value -def untyped: ValueType<8 , 130>; // Produces an untyped value -def exnref: ValueType<0, 131>; // WebAssembly's exnref type +def v16f16 : ValueType<256, 97>; // 8 x f16 vector value +def v32f16 : ValueType<512, 98>; // 8 x f16 vector value +def v1f32 : ValueType<32 , 99>; // 1 x f32 vector value +def v2f32 : ValueType<64 , 100>; // 2 x f32 vector value +def v3f32 : ValueType<96 , 101>; // 3 x f32 vector value +def v4f32 : ValueType<128, 102>; // 4 x f32 vector value +def v5f32 : ValueType<160, 103>; // 5 x f32 vector value +def v8f32 : ValueType<256, 104>; // 8 x f32 vector value +def v16f32 : ValueType<512, 105>; // 16 x f32 vector value +def v32f32 : ValueType<1024, 106>; // 32 x f32 vector value +def v64f32 : ValueType<2048, 107>; // 64 x f32 vector value +def v128f32 : ValueType<4096, 108>; // 128 x f32 vector value +def v256f32 : ValueType<8182, 109>; // 256 x f32 vector value +def v512f32 : ValueType<16384, 110>; // 512 x f32 vector value +def v1024f32 : ValueType<32768, 111>; // 1024 x f32 vector value +def v2048f32 : ValueType<65536, 112>; // 2048 x f32 vector value +def v1f64 : ValueType<64, 113>; // 1 x f64 vector value +def v2f64 : ValueType<128, 114>; // 2 x f64 vector value +def v4f64 : ValueType<256, 115>; // 4 x f64 vector value +def v8f64 : ValueType<512, 116>; // 8 x f64 vector value + +def nxv2f16 : ValueType<32 , 117>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64 , 118>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 119>; // n x 8 x f16 vector value +def nxv1f32 : ValueType<32 , 120>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64 , 121>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 122>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 123>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 124>; // n x 16 x f32 vector value +def nxv1f64 : ValueType<64, 125>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 126>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 127>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 128>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64 , 129>; // X86 MMX value +def FlagVT : ValueType<0 , 130>; // Pre-RA sched glue +def isVoid : ValueType<0 , 131>; // Produces no value +def untyped: ValueType<8 , 132>; // Produces an untyped value +def exnref: ValueType<0, 133>; // WebAssembly's exnref type def token : ValueType<0 , 248>; // TokenTy def MetadataVT: ValueType<0, 249>; // Metadata Index: llvm/trunk/include/llvm/Support/MachineValueType.h =================================================================== --- llvm/trunk/include/llvm/Support/MachineValueType.h +++ llvm/trunk/include/llvm/Support/MachineValueType.h @@ -158,37 +158,39 @@ v3f16 = 94, // 3 x f16 v4f16 = 95, // 4 x f16 v8f16 = 96, // 8 x f16 - v1f32 = 97, // 1 x f32 - v2f32 = 98, // 2 x f32 - v3f32 = 99, // 3 x f32 - v4f32 = 100, // 4 x f32 - v5f32 = 101, // 5 x f32 - v8f32 = 102, // 8 x f32 - v16f32 = 103, // 16 x f32 - v32f32 = 104, // 32 x f32 - v64f32 = 105, // 64 x f32 - v128f32 = 106, // 128 x f32 - v256f32 = 107, // 256 x f32 - v512f32 = 108, // 512 x f32 - v1024f32 = 109, // 1024 x f32 - v2048f32 = 110, // 2048 x f32 - v1f64 = 111, // 1 x f64 - v2f64 = 112, // 2 x f64 - v4f64 = 113, // 4 x f64 - v8f64 = 114, // 8 x f64 - - nxv2f16 = 115, // n x 2 x f16 - nxv4f16 = 116, // n x 4 x f16 - nxv8f16 = 117, // n x 8 x f16 - nxv1f32 = 118, // n x 1 x f32 - nxv2f32 = 119, // n x 2 x f32 - nxv4f32 = 120, // n x 4 x f32 - nxv8f32 = 121, // n x 8 x f32 - nxv16f32 = 122, // n x 16 x f32 - nxv1f64 = 123, // n x 1 x f64 - nxv2f64 = 124, // n x 2 x f64 - nxv4f64 = 125, // n x 4 x f64 - nxv8f64 = 126, // n x 8 x f64 + v16f16 = 97, // 16 x f16 + v32f16 = 98, // 32 x f16 + v1f32 = 99, // 1 x f32 + v2f32 = 100, // 2 x f32 + v3f32 = 101, // 3 x f32 + v4f32 = 102, // 4 x f32 + v5f32 = 103, // 5 x f32 + v8f32 = 104, // 8 x f32 + v16f32 = 105, // 16 x f32 + v32f32 = 106, // 32 x f32 + v64f32 = 107, // 64 x f32 + v128f32 = 108, // 128 x f32 + v256f32 = 109, // 256 x f32 + v512f32 = 110, // 512 x f32 + v1024f32 = 111, // 1024 x f32 + v2048f32 = 112, // 2048 x f32 + v1f64 = 113, // 1 x f64 + v2f64 = 114, // 2 x f64 + v4f64 = 115, // 4 x f64 + v8f64 = 116, // 8 x f64 + + nxv2f16 = 117, // n x 2 x f16 + nxv4f16 = 118, // n x 4 x f16 + nxv8f16 = 119, // n x 8 x f16 + nxv1f32 = 120, // n x 1 x f32 + nxv2f32 = 121, // n x 2 x f32 + nxv4f32 = 122, // n x 4 x f32 + nxv8f32 = 123, // n x 8 x f32 + nxv16f32 = 124, // n x 16 x f32 + nxv1f64 = 125, // n x 1 x f64 + nxv2f64 = 126, // n x 2 x f64 + nxv4f64 = 127, // n x 4 x f64 + nxv8f64 = 128, // n x 8 x f64 FIRST_FP_VECTOR_VALUETYPE = v2f16, LAST_FP_VECTOR_VALUETYPE = nxv8f64, @@ -199,20 +201,20 @@ FIRST_VECTOR_VALUETYPE = v1i1, LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 127, // This is an X86 MMX value + x86mmx = 129, // This is an X86 MMX value - Glue = 128, // This glues nodes together during pre-RA sched + Glue = 130, // This glues nodes together during pre-RA sched - isVoid = 129, // This has no value + isVoid = 131, // This has no value - Untyped = 130, // This value takes a register, but has + Untyped = 132, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. - exnref = 131, // WebAssembly's exnref type + exnref = 133, // WebAssembly's exnref type FIRST_VALUETYPE = 1, // This is always the beginning of the list. - LAST_VALUETYPE = 132, // This always remains at the end of the list. + LAST_VALUETYPE = 134, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors @@ -341,17 +343,18 @@ /// Return true if this is a 256-bit vector type. bool is256BitVector() const { - return (SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64 || - SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 || - SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64); + return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v8f32 || + SimpleTy == MVT::v4f64 || SimpleTy == MVT::v32i8 || + SimpleTy == MVT::v16i16 || SimpleTy == MVT::v8i32 || + SimpleTy == MVT::v4i64); } /// Return true if this is a 512-bit vector type. bool is512BitVector() const { - return (SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64 || - SimpleTy == MVT::v512i1 || SimpleTy == MVT::v64i8 || - SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 || - SimpleTy == MVT::v8i64); + return (SimpleTy == MVT::v32f16 || SimpleTy == MVT::v16f32 || + SimpleTy == MVT::v8f64 || SimpleTy == MVT::v512i1 || + SimpleTy == MVT::v64i8 || SimpleTy == MVT::v32i16 || + SimpleTy == MVT::v16i32 || SimpleTy == MVT::v8i64); } /// Return true if this is a 1024-bit vector type. @@ -483,6 +486,8 @@ case v3f16: case v4f16: case v8f16: + case v16f16: + case v32f16: case nxv2f16: case nxv4f16: case nxv8f16: return f16; @@ -546,6 +551,7 @@ case v32i16: case v32i32: case v32i64: + case v32f16: case v32f32: case nxv32i1: case nxv32i8: @@ -557,6 +563,7 @@ case v16i16: case v16i32: case v16i64: + case v16f16: case v16f32: case nxv16i1: case nxv16i8: @@ -741,6 +748,7 @@ case v16i16: case v8i32: case v4i64: + case v16f16: case v8f32: case v4f64: case nxv32i8: @@ -754,6 +762,7 @@ case v32i16: case v16i32: case v8i64: + case v32f16: case v16f32: case v8f64: case nxv32i16: @@ -931,6 +940,8 @@ if (NumElements == 3) return MVT::v3f16; if (NumElements == 4) return MVT::v4f16; if (NumElements == 8) return MVT::v8f16; + if (NumElements == 16) return MVT::v16f16; + if (NumElements == 32) return MVT::v32f16; break; case MVT::f32: if (NumElements == 1) return MVT::v1f32; Index: llvm/trunk/lib/CodeGen/ValueTypes.cpp =================================================================== --- llvm/trunk/lib/CodeGen/ValueTypes.cpp +++ llvm/trunk/lib/CodeGen/ValueTypes.cpp @@ -330,6 +330,8 @@ case MVT::v3f16: return VectorType::get(Type::getHalfTy(Context), 3); case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4); case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8); + case MVT::v16f16: return VectorType::get(Type::getHalfTy(Context), 16); + case MVT::v32f16: return VectorType::get(Type::getHalfTy(Context), 32); case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1); case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); case MVT::v3f32: return VectorType::get(Type::getFloatTy(Context), 3); Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -157,6 +157,8 @@ setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); @@ -219,6 +221,8 @@ setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand); setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand); setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand); + setTruncStoreAction(MVT::v16f32, MVT::v16f16, Expand); + setTruncStoreAction(MVT::v32f32, MVT::v32f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand);