diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -112,60 +112,61 @@ def v2f64 : ValueType<128, 85>; // 2 x f64 vector value def v4f64 : ValueType<256, 86>; // 4 x f64 vector value def v8f64 : ValueType<512, 87>; // 8 x f64 vector value - -def nxv1i1 : ValueType<1, 88>; // n x 1 x i1 vector value -def nxv2i1 : ValueType<2, 89>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 90>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 91>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 92>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 93>; // n x 32 x i1 vector value - -def nxv1i8 : ValueType<8, 94>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 95>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 96>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 97>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 98>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 99>; // n x 32 x i8 vector value - -def nxv1i16 : ValueType<16, 100>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 101>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 102>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 103>; // n x 8 x i16 vector value -def nxv16i16: ValueType<256, 104>; // n x 16 x i16 vector value -def nxv32i16: ValueType<512, 105>; // n x 32 x i16 vector value - -def nxv1i32 : ValueType<32, 106>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 107>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 108>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 109>; // n x 8 x i32 vector value -def nxv16i32: ValueType<512, 110>; // n x 16 x i32 vector value -def nxv32i32: ValueType<1024,111>; // n x 32 x i32 vector value - -def nxv1i64 : ValueType<64, 112>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 113>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 114>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 115>; // n x 8 x i64 vector value -def nxv16i64: ValueType<1024,116>; // n x 16 x i64 vector value -def nxv32i64: ValueType<2048,117>; // n x 32 x i64 vector value - -def nxv2f16 : ValueType<32 , 118>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64 , 119>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 120>; // n x 8 x f16 vector value -def nxv1f32 : ValueType<32 , 121>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64 , 122>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 123>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 124>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 125>; // n x 16 x f32 vector value -def nxv1f64 : ValueType<64, 126>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 127>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 128>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 129>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64 , 130>; // X86 MMX value -def FlagVT : ValueType<0 , 131>; // Pre-RA sched glue -def isVoid : ValueType<0 , 132>; // Produces no value -def untyped: ValueType<8 , 133>; // Produces an untyped value -def exnref: ValueType<0, 134>; // WebAssembly's exnref type +def v16f64 : ValueType<1024, 88>; // 16 x f64 vector value + +def nxv1i1 : ValueType<1, 89>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 90>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 91>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 92>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 93>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 94>; // n x 32 x i1 vector value + +def nxv1i8 : ValueType<8, 95>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 96>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 97>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 98>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 99>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 100>; // n x 32 x i8 vector value + +def nxv1i16 : ValueType<16, 101>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 102>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 103>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 104>; // n x 8 x i16 vector value +def nxv16i16: ValueType<256, 105>; // n x 16 x i16 vector value +def nxv32i16: ValueType<512, 106>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 107>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 108>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 109>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 110>; // n x 8 x i32 vector value +def nxv16i32: ValueType<512, 111>; // n x 16 x i32 vector value +def nxv32i32: ValueType<1024,112>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 113>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 114>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 115>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 116>; // n x 8 x i64 vector value +def nxv16i64: ValueType<1024,117>; // n x 16 x i64 vector value +def nxv32i64: ValueType<2048,118>; // n x 32 x i64 vector value + +def nxv2f16 : ValueType<32 , 119>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64 , 120>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 121>; // n x 8 x f16 vector value +def nxv1f32 : ValueType<32 , 122>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64 , 123>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 124>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 125>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 126>; // n x 16 x f32 vector value +def nxv1f64 : ValueType<64, 127>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 128>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 129>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 130>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64 , 131>; // X86 MMX value +def FlagVT : ValueType<0 , 132>; // Pre-RA sched glue +def isVoid : ValueType<0 , 133>; // Produces no value +def untyped: ValueType<8 , 134>; // Produces an untyped value +def exnref : ValueType<0 , 135>; // WebAssembly's exnref type def token : ValueType<0 , 248>; // TokenTy def MetadataVT: ValueType<0, 249>; // Metadata diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -289,6 +289,7 @@ def llvm_v2f64_ty : LLVMType; // 2 x double def llvm_v4f64_ty : LLVMType; // 4 x double def llvm_v8f64_ty : LLVMType; // 8 x double +def llvm_v16f64_ty : LLVMType; // 16 x double def llvm_vararg_ty : LLVMType; // this means vararg here diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -140,63 +140,64 @@ v2f64 = 85, // 2 x f64 v4f64 = 86, // 4 x f64 v8f64 = 87, // 8 x f64 + v16f64 = 88, // 16 x f64 FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v2f16, - LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v8f64, + LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v16f64, FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1, - LAST_FIXEDLEN_VECTOR_VALUETYPE = v8f64, - - nxv1i1 = 88, // n x 1 x i1 - nxv2i1 = 89, // n x 2 x i1 - nxv4i1 = 90, // n x 4 x i1 - nxv8i1 = 91, // n x 8 x i1 - nxv16i1 = 92, // n x 16 x i1 - nxv32i1 = 93, // n x 32 x i1 - - nxv1i8 = 94, // n x 1 x i8 - nxv2i8 = 95, // n x 2 x i8 - nxv4i8 = 96, // n x 4 x i8 - nxv8i8 = 97, // n x 8 x i8 - nxv16i8 = 98, // n x 16 x i8 - nxv32i8 = 99, // n x 32 x i8 - - nxv1i16 = 100, // n x 1 x i16 - nxv2i16 = 101, // n x 2 x i16 - nxv4i16 = 102, // n x 4 x i16 - nxv8i16 = 103, // n x 8 x i16 - nxv16i16 = 104, // n x 16 x i16 - nxv32i16 = 105, // n x 32 x i16 - - nxv1i32 = 106, // n x 1 x i32 - nxv2i32 = 107, // n x 2 x i32 - nxv4i32 = 108, // n x 4 x i32 - nxv8i32 = 109, // n x 8 x i32 - nxv16i32 = 110, // n x 16 x i32 - nxv32i32 = 111, // n x 32 x i32 - - nxv1i64 = 112, // n x 1 x i64 - nxv2i64 = 113, // n x 2 x i64 - nxv4i64 = 114, // n x 4 x i64 - nxv8i64 = 115, // n x 8 x i64 - nxv16i64 = 116, // n x 16 x i64 - nxv32i64 = 117, // n x 32 x i64 + LAST_FIXEDLEN_VECTOR_VALUETYPE = v16f64, + + nxv1i1 = 89, // n x 1 x i1 + nxv2i1 = 90, // n x 2 x i1 + nxv4i1 = 91, // n x 4 x i1 + nxv8i1 = 92, // n x 8 x i1 + nxv16i1 = 93, // n x 16 x i1 + nxv32i1 = 94, // n x 32 x i1 + + nxv1i8 = 95, // n x 1 x i8 + nxv2i8 = 96, // n x 2 x i8 + nxv4i8 = 97, // n x 4 x i8 + nxv8i8 = 98, // n x 8 x i8 + nxv16i8 = 99, // n x 16 x i8 + nxv32i8 = 100, // n x 32 x i8 + + nxv1i16 = 101, // n x 1 x i16 + nxv2i16 = 102, // n x 2 x i16 + nxv4i16 = 103, // n x 4 x i16 + nxv8i16 = 104, // n x 8 x i16 + nxv16i16 = 105, // n x 16 x i16 + nxv32i16 = 106, // n x 32 x i16 + + nxv1i32 = 107, // n x 1 x i32 + nxv2i32 = 108, // n x 2 x i32 + nxv4i32 = 109, // n x 4 x i32 + nxv8i32 = 110, // n x 8 x i32 + nxv16i32 = 111, // n x 16 x i32 + nxv32i32 = 112, // n x 32 x i32 + + nxv1i64 = 113, // n x 1 x i64 + nxv2i64 = 114, // n x 2 x i64 + nxv4i64 = 115, // n x 4 x i64 + nxv8i64 = 116, // n x 8 x i64 + nxv16i64 = 117, // n x 16 x i64 + nxv32i64 = 118, // n x 32 x i64 FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1, LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64, - nxv2f16 = 118, // n x 2 x f16 - nxv4f16 = 119, // n x 4 x f16 - nxv8f16 = 120, // n x 8 x f16 - nxv1f32 = 121, // n x 1 x f32 - nxv2f32 = 122, // n x 2 x f32 - nxv4f32 = 123, // n x 4 x f32 - nxv8f32 = 124, // n x 8 x f32 - nxv16f32 = 125, // n x 16 x f32 - nxv1f64 = 126, // n x 1 x f64 - nxv2f64 = 127, // n x 2 x f64 - nxv4f64 = 128, // n x 4 x f64 - nxv8f64 = 129, // n x 8 x f64 + nxv2f16 = 119, // n x 2 x f16 + nxv4f16 = 120, // n x 4 x f16 + nxv8f16 = 121, // n x 8 x f16 + nxv1f32 = 122, // n x 1 x f32 + nxv2f32 = 123, // n x 2 x f32 + nxv4f32 = 124, // n x 4 x f32 + nxv8f32 = 125, // n x 8 x f32 + nxv16f32 = 126, // n x 16 x f32 + nxv1f64 = 127, // n x 1 x f64 + nxv2f64 = 128, // n x 2 x f64 + nxv4f64 = 129, // n x 4 x f64 + nxv8f64 = 130, // n x 8 x f64 FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv2f16, LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64, @@ -207,20 +208,20 @@ FIRST_VECTOR_VALUETYPE = v1i1, LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 130, // This is an X86 MMX value + x86mmx = 131, // This is an X86 MMX value - Glue = 131, // This glues nodes together during pre-RA sched + Glue = 132, // This glues nodes together during pre-RA sched - isVoid = 132, // This has no value + isVoid = 133, // This has no value - Untyped = 133, // This value takes a register, but has + Untyped = 134, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. - exnref = 134, // WebAssembly's exnref type + exnref = 135, // WebAssembly's exnref type FIRST_VALUETYPE = 1, // This is always the beginning of the list. - LAST_VALUETYPE = 135, // This always remains at the end of the list. + LAST_VALUETYPE = 136, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors @@ -374,7 +375,7 @@ bool is1024BitVector() const { return (SimpleTy == MVT::v1024i1 || SimpleTy == MVT::v128i8 || SimpleTy == MVT::v64i16 || SimpleTy == MVT::v32i32 || - SimpleTy == MVT::v16i64); + SimpleTy == MVT::v16i64 || SimpleTy == MVT::v16f64); } /// Return true if this is a 2048-bit vector type. @@ -537,6 +538,7 @@ case v2f64: case v4f64: case v8f64: + case v16f64: case nxv1f64: case nxv2f64: case nxv4f64: @@ -589,6 +591,7 @@ case v16i64: case v16f16: case v16f32: + case v16f64: case nxv16i1: case nxv16i8: case nxv16i16: @@ -805,6 +808,7 @@ case v64i16: case v32i32: case v16i64: + case v16f64: case v32f32: return TypeSize::Fixed(1024); case nxv32i32: case nxv16i64: return TypeSize::Scalable(1024); @@ -1010,6 +1014,7 @@ if (NumElements == 2) return MVT::v2f64; if (NumElements == 4) return MVT::v4f64; if (NumElements == 8) return MVT::v8f64; + if (NumElements == 16) return MVT::v16f64; break; } return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE); diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -254,6 +254,7 @@ case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2); case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4); case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8); + case MVT::v16f64: return VectorType::get(Type::getDoubleTy(Context), 16); case MVT::nxv1i1: return VectorType::get(Type::getInt1Ty(Context), 1, /*Scalable=*/ true); case MVT::nxv2i1: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -119,6 +119,12 @@ setOperationAction(ISD::LOAD, MVT::v8f64, Promote); AddPromotedToType(ISD::LOAD, MVT::v8f64, MVT::v16i32); + setOperationAction(ISD::LOAD, MVT::v16i64, Promote); + AddPromotedToType(ISD::LOAD, MVT::v16i64, MVT::v32i32); + + setOperationAction(ISD::LOAD, MVT::v16f64, Promote); + AddPromotedToType(ISD::LOAD, MVT::v16f64, MVT::v32i32); + // There are no 64-bit extloads. These should be done as a 32-bit extload and // an extension to 64-bit. for (MVT VT : MVT::integer_valuetypes()) { @@ -177,11 +183,13 @@ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f16, Expand); setOperationAction(ISD::STORE, MVT::f32, Promote); AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); @@ -231,6 +239,12 @@ setOperationAction(ISD::STORE, MVT::v8f64, Promote); AddPromotedToType(ISD::STORE, MVT::v8f64, MVT::v16i32); + setOperationAction(ISD::STORE, MVT::v16i64, Promote); + AddPromotedToType(ISD::STORE, MVT::v16i64, MVT::v32i32); + + setOperationAction(ISD::STORE, MVT::v16f64, Promote); + AddPromotedToType(ISD::STORE, MVT::v16f64, MVT::v32i32); + setTruncStoreAction(MVT::i64, MVT::i1, Expand); setTruncStoreAction(MVT::i64, MVT::i8, Expand); setTruncStoreAction(MVT::i64, MVT::i16, Expand); @@ -263,6 +277,8 @@ setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand); setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand); + setTruncStoreAction(MVT::v16f64, MVT::v16f32, Expand); + setTruncStoreAction(MVT::v16f64, MVT::v16f16, Expand); setOperationAction(ISD::Constant, MVT::i32, Legal); setOperationAction(ISD::Constant, MVT::i64, Legal); diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll --- a/llvm/test/Analysis/CostModel/ARM/cast.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast.ll @@ -372,12 +372,12 @@ ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r86 = fpext <2 x float> undef to <2 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r87 = fpext <4 x float> undef to <4 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r88 = fpext <8 x float> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r89 = fpext <16 x float> undef to <16 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> @@ -448,16 +448,16 @@ ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> @@ -528,16 +528,16 @@ ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-LABEL: 'casts' diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp --- a/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/CodeGenTarget.cpp @@ -150,6 +150,7 @@ case MVT::v2f64: return "MVT::v2f64"; case MVT::v4f64: return "MVT::v4f64"; case MVT::v8f64: return "MVT::v8f64"; + case MVT::v16f64: return "MVT::v16f64"; case MVT::nxv1i1: return "MVT::nxv1i1"; case MVT::nxv2i1: return "MVT::nxv2i1"; case MVT::nxv4i1: return "MVT::nxv4i1";