diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -160,6 +160,8 @@ X86 Support ^^^^^^^^^^^ +- Support ISA of ``AVX10.1``. + Arm and AArch64 Support ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4939,6 +4939,10 @@ def mno_sse4a : Flag<["-"], "mno-sse4a">, Group; def mavx : Flag<["-"], "mavx">, Group; def mno_avx : Flag<["-"], "mno-avx">, Group; +def mavx10_1 : Flag<["-"], "mavx10.1">, Group; +def mno_avx10_1 : Flag<["-"], "mno-avx10.1">, Group; +def mavx10_1_256 : Flag<["-"], "mavx10.1-256">, Group; +def mavx10_1_512 : Flag<["-"], "mavx10.1-512">, Group; def mavx2 : Flag<["-"], "mavx2">, Group; def mno_avx2 : Flag<["-"], "mno-avx2">, Group; def mavx512f : Flag<["-"], "mavx512f">, Group; diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -95,6 +95,8 @@ bool HasLWP = false; bool HasFMA = false; bool HasF16C = false; + bool HasAVX10_1 = false; + bool HasAVX10_512BIT = false; bool HasAVX512CD = false; bool HasAVX512VPOPCNTDQ = false; bool HasAVX512VNNI = false; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -228,6 +228,10 @@ HasF16C = true; } else if (Feature == "+gfni") { HasGFNI = true; + } else if (Feature == "+avx10.1") { + HasAVX10_1 = true; + } else if (Feature == "+avx10-512bit") { + HasAVX10_512BIT = true; } else if (Feature == "+avx512cd") { HasAVX512CD = true; } else if (Feature == "+avx512vpopcntdq") { @@ -729,6 +733,11 @@ if (HasGFNI) Builder.defineMacro("__GFNI__"); + if (HasAVX10_1) + Builder.defineMacro("__AVX10_1__"); + if (HasAVX10_512BIT) + Builder.defineMacro("__AVX10_512BIT__"); + if (HasAVX512CD) Builder.defineMacro("__AVX512CD__"); if (HasAVX512VPOPCNTDQ) @@ -952,6 +961,8 @@ .Case("amx-int8", true) .Case("amx-tile", true) .Case("avx", true) + .Case("avx10-512bit", true) + .Case("avx10.1", true) .Case("avx2", true) .Case("avx512f", true) .Case("avx512cd", true) @@ -1058,6 +1069,8 @@ .Case("amx-int8", HasAMXINT8) .Case("amx-tile", HasAMXTILE) .Case("avx", SSELevel >= AVX) + .Case("avx10-512bit", HasAVX10_512BIT) + .Case("avx10.1", HasAVX10_1) .Case("avx2", SSELevel >= AVX2) .Case("avx512f", SSELevel >= AVX512F) .Case("avx512cd", HasAVX512CD) @@ -1529,7 +1542,11 @@ return Size <= 64; case 'z': // XMM0/YMM/ZMM0 - if (hasFeatureEnabled(FeatureMap, "avx512f")) + if (hasFeatureEnabled(FeatureMap, "avx10.1") && + !hasFeatureEnabled(FeatureMap, "avx10-512bit")) + // ZMM0 cannot be used if target only supports AVX10.x. + return Size <= 256U; + else if (hasFeatureEnabled(FeatureMap, "avx512f")) // ZMM0 can be used if target supports AVX512F. return Size <= 512U; else if (hasFeatureEnabled(FeatureMap, "avx")) @@ -1549,7 +1566,11 @@ break; case 'v': case 'x': - if (hasFeatureEnabled(FeatureMap, "avx512f")) + if (hasFeatureEnabled(FeatureMap, "avx10.1") && + !hasFeatureEnabled(FeatureMap, "avx10-512bit")) + // 512-bit zmm registers cannot be used if target only supports AVX10.x. + return Size <= 256U; + else if (hasFeatureEnabled(FeatureMap, "avx512f")) // 512-bit zmm registers can be used if target supports AVX512F. return Size <= 512U; else if (hasFeatureEnabled(FeatureMap, "avx")) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5414,8 +5414,11 @@ // can move this up to the beginning of the function. checkTargetFeatures(E, FD); - if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID)) + if (unsigned VectorWidth = + getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID)) { + checkTargetVectorWidth(E, FD, VectorWidth); LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth); + } // See if we have a target specific intrinsic. StringRef Name = getContext().BuiltinInfo.getName(BuiltinID); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4067,6 +4067,8 @@ void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl); void checkTargetFeatures(SourceLocation Loc, const FunctionDecl *TargetDecl); + void checkTargetVectorWidth(const CallExpr *E, const FunctionDecl *TargetDecl, + unsigned VectorWidth); llvm::CallInst *EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name = ""); diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -2573,6 +2573,20 @@ return checkTargetFeatures(E->getBeginLoc(), TargetDecl); } +// Emits an error if the builtin's vector width >= 512 and avx10-512bit +// feature is not set. +void CodeGenFunction::checkTargetVectorWidth(const CallExpr *E, + const FunctionDecl *TargetDecl, + unsigned VectorWidth) { + if (!getTarget().getTriple().isX86() || VectorWidth < 512) + return; + llvm::StringMap FeatureMap; + CGM.getContext().getFunctionFeatureMap(FeatureMap, TargetDecl); + if (FeatureMap.lookup("avx10.1") && !FeatureMap.lookup("avx10-512bit")) + CGM.getDiags().Report(E->getBeginLoc(), diag::err_builtin_needs_feature) + << TargetDecl->getDeclName() << "avx10-512bit"; +} + // Emits an error if we don't have a valid set of target features for the // called function. void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -1486,6 +1486,24 @@ } } +static bool checkAVX10ParamFeature(DiagnosticsEngine &Diag, + SourceLocation CallLoc, + const llvm::StringMap &CallerMap, + const llvm::StringMap &CalleeMap, + QualType Ty, bool IsArgument) { + bool CallerAVX256 = + CallerMap.lookup("avx10.1") && !CallerMap.lookup("avx10-512bit"); + bool CalleeAVX256 = + CallerMap.lookup("avx10.1") && !CallerMap.lookup("avx10-512bit"); + + // Forbid 512-bit or large vector pass or return on AVX10 256-bit targets. + if (CallerAVX256 || CalleeAVX256) + return Diag.Report(CallLoc, diag::err_avx_calling_convention) + << IsArgument << Ty << "avx10.x-256"; + + return false; +} + static bool checkAVXParamFeature(DiagnosticsEngine &Diag, SourceLocation CallLoc, const llvm::StringMap &CallerMap, @@ -1515,7 +1533,9 @@ bool IsArgument) { uint64_t Size = Ctx.getTypeSize(Ty); if (Size > 256) - return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, + return checkAVX10ParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, + IsArgument) || + checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx512f", IsArgument); if (Size > 128) diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp --- a/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -231,6 +231,9 @@ // Now add any that the user explicitly requested on the command line, // which may override the defaults. + bool HasAVX10x = false; + int AVXVecSize = 0; + std::vector AVX512Cand; for (const Arg *A : Args.filtered(options::OPT_m_x86_Features_Group, options::OPT_mgeneral_regs_only)) { StringRef Name = A->getOption().getName(); @@ -249,7 +252,44 @@ bool IsNegative = Name.startswith("no-"); if (IsNegative) Name = Name.substr(3); - Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name)); + if (Name.startswith("avx10.")) { + HasAVX10x = true; + StringRef VecSizeStr; + std::tie(Name, VecSizeStr) = Name.split('-'); + if (VecSizeStr == "512") { + if (AVXVecSize == 256) + D.Diag(diag::warn_drv_overriding_flag_option) << "AVX10-256" + << "AVX10-512"; + AVXVecSize = 512; + } else if (VecSizeStr == "256") { + if (AVXVecSize == 512) + D.Diag(diag::warn_drv_overriding_flag_option) << "AVX10-512" + << "AVX10-256"; + AVXVecSize = 256; + } else if (VecSizeStr != "") { + D.Diag(diag::err_drv_unsupported_opt_with_suggestion) + << A->getOption().getName() << Name; + } + } + StringRef ArgString = Args.MakeArgString((IsNegative ? "-" : "+") + Name); + if (Name.startswith("avx512")) + AVX512Cand.push_back(ArgString); + else + Features.push_back(ArgString); + } + + // If -mavx10.x is specified, clear all -m[no-]avx512xxx options and emit a + // warning. + if (HasAVX10x) { + if (AVX512Cand.size()) + D.Diag(diag::warn_drv_overriding_flag_option) << "avx512*" + << "avx10.*"; + if (AVXVecSize == 256) + Features.push_back("-avx10-512bit"); + if (AVXVecSize == 512) + Features.push_back("+avx10-512bit"); + } else { + Features.insert(Features.end(), AVX512Cand.begin(), AVX512Cand.end()); } // Enable/disable straight line speculation hardening. diff --git a/clang/test/CodeGen/X86/avx10-error.c b/clang/test/CodeGen/X86/avx10-error.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/X86/avx10-error.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1 -emit-llvm -verify + +#include + +__m512d test_mm512_sqrt_pd(__m512d a) +{ + // CHECK-LABEL: @test_mm512_sqrt_pd + return __builtin_ia32_sqrtpd512(a, _MM_FROUND_CUR_DIRECTION); // expected-error {{'__builtin_ia32_sqrtpd512' needs target feature avx10-512bit}} +} diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c --- a/clang/test/CodeGen/attr-target-x86.c +++ b/clang/test/CodeGen/attr-target-x86.c @@ -54,9 +54,9 @@ // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686" // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" // CHECK-NOT: tune-cpu -// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" +// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx10.1,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686" -// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" +// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx10.1,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" // CHECK-NOT: tune-cpu // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-3dnow,-3dnowa,-mmx" diff --git a/clang/test/CodeGen/target-avx-abi-diag.c b/clang/test/CodeGen/target-avx-abi-diag.c --- a/clang/test/CodeGen/target-avx-abi-diag.c +++ b/clang/test/CodeGen/target-avx-abi-diag.c @@ -1,6 +1,8 @@ // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -verify=no256,no512 -o - -S // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx -verify=no512 -o - -S // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -verify=both -o - -S +// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx10.1 -DAVX10_256 -verify=avx10-256 -o - -S +// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx10.1 -target-feature +avx10-512bit -verify=both -o - -S // REQUIRES: x86-registered-target // both-no-diagnostics @@ -16,6 +18,7 @@ void variadic(int i, ...); __attribute__((target("avx512f"))) void variadic_err(int i, ...); +#ifndef AVX10_256 // If neither side has an attribute, warn. void call_warn(void) { avx256Type t1; @@ -27,15 +30,19 @@ variadic(1, t1); // no256-warning {{AVX vector argument of type 'avx256Type' (vector of 16 'short' values) without 'avx' enabled changes the ABI}} variadic(3, t2); // no512-warning {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx512f' enabled changes the ABI}} } +#endif // If only 1 side has an attribute, error. void call_errors(void) { avx256Type t1; takesAvx256(t1); // no256-error {{AVX vector argument of type 'avx256Type' (vector of 16 'short' values) without 'avx' enabled changes the ABI}} avx512fType t2; + + // avx10-256-error@+1 {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx10.x-256' enabled changes the ABI}} takesAvx512(t2); // no512-error {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx512f' enabled changes the ABI}} variadic_err(1, t1); // no256-error {{AVX vector argument of type 'avx256Type' (vector of 16 'short' values) without 'avx' enabled changes the ABI}} + // avx10-256-error@+1 {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx10.x-256' enabled changes the ABI}} variadic_err(3, t2); // no512-error {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx512f' enabled changes the ABI}} } diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -369,6 +369,25 @@ // AVXVNNIINT16: "-target-feature" "+avxvnniint16" // NO-AVXVNNIINT16: "-target-feature" "-avxvnniint16" +// RUN: %clang --target=i386 -mavx10.1 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10_1 %s +// RUN: %clang --target=i386 -mavx10.1 -mavx512f %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=AVX10_1,AVX10_WARN %s +// RUN: %clang --target=i386 -mavx10.1 -mno-avx512f %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=AVX10_1,AVX10_WARN %s +// RUN: %clang --target=i386 -mno-avx10.1 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX10_1 %s +// RUN: %clang --target=i386 -mno-avx10.1 -mavx512f %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=NO-AVX10_1,AVX10_WARN %s +// RUN: %clang --target=i386 -mno-avx10.1 -mno-avx512f %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=NO-AVX10_1,AVX10_WARN %s +// AVX10_WARN: clang: warning: overriding 'avx512*' option with 'avx10.*' [-Woverriding-t-option] +// AVX10_1: "-target-feature" "+avx10.1" +// NO-AVX10_1: "-target-feature" "-avx10.1" + +// RUN: %clang --target=i386 -mavx10.1-512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10_512BIT %s +// RUN: %clang --target=i386 -mavx10.1-256 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX10_512BIT %s +// RUN: %clang --target=i386 -mavx10.1-256 -mavx10.1-512 %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=AVX10_512BIT,OVER256_WARN %s +// RUN: %clang --target=i386 -mavx10.1-512 -mavx10.1-256 %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=NO-AVX10_512BIT,OVER512_WARN %s +// OVER256_WARN: clang: warning: overriding 'AVX10-256' option with 'AVX10-512' [-Woverriding-t-option] +// OVER512_WARN: clang: warning: overriding 'AVX10-512' option with 'AVX10-256' [-Woverriding-t-option] +// AVX10_512BIT: "-target-feature" "+avx10-512bit" +// NO-AVX10_512BIT: "-target-feature" "-avx10-512bit" + // RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s // RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s // CRC32: "-target-feature" "+crc32" diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -714,6 +714,18 @@ // AVXVNNIINT16NOAVX2-NOT: #define __AVX2__ 1 // AVXVNNIINT16NOAVX2-NOT: #define __AVXVNNIINT16__ 1 +// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.1 -x c -E -dM -o - %s | FileCheck -check-prefix=AVX10_1 %s +// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.1 -mno-avx512f -x c -E -dM -o - %s | FileCheck -check-prefix=AVX10_1 %s + +// AVX10_1: #define __AVX10_1__ 1 +// AVX10_1: #define __AVX512F__ 1 + +// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mno-avx10.1 -x c -E -dM -o - %s | FileCheck -check-prefix=NOAVX10_1 %s +// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mno-avx10.1 -mavx512f -x c -E -dM -o - %s | FileCheck -check-prefix=NOAVX10_1 %s + +// NOAVX10_1-NOT: #define __AVX10_1__ 1 +// NOAVX10_1-NOT: #define __AVX512F__ 1 + // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s // CRC32: #define __CRC32__ 1 diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -104,6 +104,8 @@ Changes to the X86 Backend -------------------------- +* Support ISA of ``AVX10.1``. + Changes to the OCaml bindings ----------------------------- diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def --- a/llvm/include/llvm/TargetParser/X86TargetParser.def +++ b/llvm/include/llvm/TargetParser/X86TargetParser.def @@ -235,6 +235,8 @@ X86_FEATURE (SM3, "sm3") X86_FEATURE (SM4, "sm4") X86_FEATURE (AVXVNNIINT16, "avxvnniint16") +X86_FEATURE (AVX10_1, "avx10.1") +X86_FEATURE (AVX10_512BIT, "avx10-512bit") // These features aren't really CPU features, but the frontend can set them. X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk") X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches") diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2031,6 +2031,17 @@ "' does not apply to function return values", V); + unsigned MaxParameterWidth = 0; + auto GetMaxParameterWidth = [&MaxParameterWidth](Type *Ty) { + if (Ty->isVectorTy()) { + if (auto *VT = dyn_cast(Ty)) { + unsigned Size = VT->getPrimitiveSizeInBits().getFixedValue(); + if (Size > MaxParameterWidth) + MaxParameterWidth = Size; + } + } + }; + GetMaxParameterWidth(FT->getReturnType()); verifyParameterAttrs(RetAttrs, FT->getReturnType(), V); // Verify parameter attributes. @@ -2049,6 +2060,7 @@ } verifyParameterAttrs(ArgAttrs, Ty, V); + GetMaxParameterWidth(Ty); if (ArgAttrs.hasAttribute(Attribute::Nest)) { Check(!SawNest, "More than one parameter has attribute nest!", V); @@ -2204,6 +2216,16 @@ CheckFailed("invalid value for 'frame-pointer' attribute: " + FP, V); } + // Check AVX10 512-bit feature. + if (MaxParameterWidth >= 512 && Attrs.hasFnAttr("target-features")) { + Triple T(M.getTargetTriple()); + if (T.isX86()) { + StringRef TF = Attrs.getFnAttr("target-features").getValueAsString(); + Check(!TF.contains("+avx10.1") || TF.contains("+avx10-512bit"), + "512-bit vector arguments require 'avx10-512bit' for AVX10", V); + } + } + checkUnsignedBaseTenFuncAttr(Attrs, "patchable-function-prefix", V); checkUnsignedBaseTenFuncAttr(Attrs, "patchable-function-entry", V); checkUnsignedBaseTenFuncAttr(Attrs, "warn-stack-size", V); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -285,6 +285,7 @@ SmallVectorImpl &CB) const; PrefixKind emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, + const MCSubtargetInfo &STI, SmallVectorImpl &CB) const; void emitSegmentOverridePrefix(unsigned SegOperand, const MCInst &MI, @@ -841,7 +842,7 @@ // REX prefix is optional, but if used must be immediately before the opcode // Encoding type for this instruction. return (TSFlags & X86II::EncodingMask) - ? emitVEXOpcodePrefix(MemoryOperand, MI, CB) + ? emitVEXOpcodePrefix(MemoryOperand, MI, STI, CB) : emitOpcodePrefix(MemoryOperand, MI, STI, CB); } @@ -860,6 +861,7 @@ /// \returns the used prefix. PrefixKind X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, + const MCSubtargetInfo &STI, SmallVectorImpl &CB) const { const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); uint64_t TSFlags = Desc.TSFlags; @@ -919,6 +921,9 @@ Prefix.setL(TSFlags & X86II::VEX_L); Prefix.setL2(TSFlags & X86II::EVEX_L2); + if ((TSFlags & X86II::EVEX_L2) && STI.hasFeature(X86::FeatureAVX10_1) && + !STI.hasFeature(X86::FeatureAVX10_512bit)) + report_fatal_error("ZMM registers are not supported without AVX10-512BIT"); switch (TSFlags & X86II::OpPrefixMask) { case X86II::PD: Prefix.setPP(0x1); // 66 diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -187,6 +187,13 @@ def FeatureFP16 : SubtargetFeature<"avx512fp16", "HasFP16", "true", "Support 16-bit floating point", [FeatureBWI, FeatureVLX, FeatureDQI]>; +def FeatureAVX10_1 : SubtargetFeature<"avx10.1", "HasAVX10_1", "true", + "Enable AVX10.1 instructions", + [FeatureFP16, FeatureCDI, FeatureBF16, + FeatureBITALG, FeatureIFMA, FeatureVNNI, + FeatureVPOPCNTDQ, FeatureVBMI, FeatureVBMI2]>; +def FeatureAVX10_512bit : SubtargetFeature<"avx10-512bit", "HasAVX10_512BIT", "true", + "Enable AVX10 512-bit Instructions">; def FeatureAVXVNNIINT8 : SubtargetFeature<"avxvnniint8", "HasAVXVNNIINT8", "true", "Enable AVX-VNNI-INT8", diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -902,6 +902,8 @@ def NoAVX : Predicate<"!Subtarget->hasAVX()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; +def HasAVX10_1 : Predicate<"Subtarget->hasAVX10_1()">; +def HasAVX10_512BIT : Predicate<"Subtarget->hasAVX10_512BIT()">; def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">; def HasAVX512 : Predicate<"Subtarget->hasAVX512()">; def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -1030,7 +1030,14 @@ bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( VirtReg, Order, Hints, MF, VRM, Matrix); - if (RC.getID() != X86::TILERegClassID) + unsigned ID = RC.getID(); + const X86Subtarget &Subtarget = MF.getSubtarget(); + if ((ID == X86::VK64RegClassID || ID == X86::VK64WMRegClassID) && + Subtarget.hasAVX10_1() && !Subtarget.hasAVX10_512BIT()) + report_fatal_error( + "64-bit mask registers are not supported without AVX10-512BIT"); + + if (ID != X86::TILERegClassID) return BaseImplRetVal; ShapeT VirtShape = getTileShape(VirtReg, const_cast(VRM), MRI); diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -263,6 +263,9 @@ // If there are no 512-bit vectors and we prefer not to use 512-bit registers, // disable them in the legalizer. bool useAVX512Regs() const { + if (hasAVX10_1()) + return hasAVX10_512BIT() && + (getPreferVectorWidth() >= 512 || RequiredVectorWidth > 256); return hasAVX512() && (canExtendTo512DQ() || RequiredVectorWidth > 256); } diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1791,6 +1791,7 @@ Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave; Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave; Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1); + Features["avx10.1"] = HasLeaf7Subleaf1 && ((EDX >> 19) & 1); bool HasLeafD = MaxLevel >= 0xd && !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); @@ -1809,6 +1810,10 @@ MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX); Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1); + bool HasLeaf24 = + MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX); + Features["avx10-512bit"] = HasLeaf24 && ((EBX >> 18) & 1); + return true; } #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -678,6 +678,12 @@ // AVXVNNI Features constexpr FeatureBitset ImpliedFeaturesAVXVNNI = FeatureAVX2; +constexpr FeatureBitset ImpliedFeaturesAVX10_1 = + FeatureAVX512FP16 | FeatureAVX512CD | FeatureAVX512BF16 | + FeatureAVX512BITALG | FeatureAVX512IFMA | FeatureAVX512VNNI | + FeatureAVX512VPOPCNTDQ | FeatureAVX512VBMI | FeatureAVX512VBMI2; +constexpr FeatureBitset ImpliedFeaturesAVX10_512BIT = {}; + constexpr FeatureInfo FeatureInfos[X86::CPU_FEATURE_MAX] = { #define X86_FEATURE(ENUM, STR) {{STR}, ImpliedFeatures##ENUM}, #include "llvm/TargetParser/X86TargetParser.def" diff --git a/llvm/test/CodeGen/X86/avx512-arith.ll b/llvm/test/CodeGen/X86/avx512-arith.ll --- a/llvm/test/CodeGen/X86/avx512-arith.ll +++ b/llvm/test/CodeGen/X86/avx512-arith.ll @@ -4,6 +4,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1,+avx10-512bit | FileCheck %s --check-prefix=CHECK --check-prefix=SKX define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { ; CHECK-LABEL: addpd512: diff --git a/llvm/test/CodeGen/X86/avx512-broadcast-arith.ll b/llvm/test/CodeGen/X86/avx512-broadcast-arith.ll --- a/llvm/test/CodeGen/X86/avx512-broadcast-arith.ll +++ b/llvm/test/CodeGen/X86/avx512-broadcast-arith.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx10.1,+avx10-512bit | FileCheck %s --check-prefixes=AVX512BW ; PR34666 define <64 x i8> @add_v64i8_broadcasts(<64 x i8> %a0, i64 %a1, i8 %a2) { diff --git a/llvm/test/CodeGen/X86/avx512bw-arith.ll b/llvm/test/CodeGen/X86/avx512bw-arith.ll --- a/llvm/test/CodeGen/X86/avx512bw-arith.ll +++ b/llvm/test/CodeGen/X86/avx512bw-arith.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1,+avx10-512bit | FileCheck %s define <64 x i8> @vpaddb512_test(<64 x i8> %i, <64 x i8> %j) nounwind readnone { ; CHECK-LABEL: vpaddb512_test: diff --git a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll --- a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1 | FileCheck %s ; 256-bit diff --git a/llvm/test/CodeGen/X86/avx512fp16-arith.ll b/llvm/test/CodeGen/X86/avx512fp16-arith.ll --- a/llvm/test/CodeGen/X86/avx512fp16-arith.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-arith.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512fp16 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx10.1,+avx10-512bit | FileCheck %s define <32 x half> @vaddph_512_test(<32 x half> %i, <32 x half> %j) nounwind readnone { ; CHECK-LABEL: vaddph_512_test: diff --git a/llvm/test/CodeGen/X86/avx512vl-arith.ll b/llvm/test/CodeGen/X86/avx512vl-arith.ll --- a/llvm/test/CodeGen/X86/avx512vl-arith.ll +++ b/llvm/test/CodeGen/X86/avx512vl-arith.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx10.1 --show-mc-encoding| FileCheck %s ; 256-bit