diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -314,7 +314,7 @@ // Allow 32-bit only CPUs regardless of 64-bit mode unlike isValidCPUName. // NOTE: gcc rejects 32-bit mtune CPUs in 64-bit mode. But being lenient // since mtune was ignored by clang for so long. - return llvm::X86::parseArchX86(Name) != llvm::X86::CK_None; + return llvm::X86::parseTuneCPU(Name) != llvm::X86::CK_None; } void fillValidCPUList(SmallVectorImpl &Values) const override; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -506,6 +506,9 @@ case CK_K8: case CK_K8SSE3: case CK_x86_64: + case CK_x86_64_v2: + case CK_x86_64_v3: + case CK_x86_64_v4: defineCPUMacros(Builder, "k8"); break; case CK_AMDFAM10: @@ -1307,6 +1310,9 @@ case CK_ZNVER2: // Deprecated case CK_x86_64: + case CK_x86_64_v2: + case CK_x86_64_v3: + case CK_x86_64_v4: case CK_Yonah: case CK_Penryn: case CK_Core2: @@ -1451,7 +1457,7 @@ } void X86TargetInfo::fillValidTuneCPUList(SmallVectorImpl &Values) const { - llvm::X86::fillValidCPUArchList(Values); + llvm::X86::fillValidTuneCPUList(Values); } ArrayRef X86TargetInfo::getGCCRegNames() const { diff --git a/clang/test/Driver/x86-march.c b/clang/test/Driver/x86-march.c --- a/clang/test/Driver/x86-march.c +++ b/clang/test/Driver/x86-march.c @@ -175,3 +175,12 @@ // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver2 2>&1 \ // RUN: | FileCheck %s -check-prefix=znver2 // znver2: "-target-cpu" "znver2" + +// RUN: %clang -target x86_64 -c -### %s -march=x86-64 2>&1 | FileCheck %s --check-prefix=x86-64 +// x86-64: "-target-cpu" "x86-64" +// RUN: %clang -target x86_64 -c -### %s -march=x86-64-v2 2>&1 | FileCheck %s --check-prefix=x86-64-v2 +// x86-64-v2: "-target-cpu" "x86-64-v2" +// RUN: %clang -target x86_64 -c -### %s -march=x86-64-v3 2>&1 | FileCheck %s --check-prefix=x86-64-v3 +// x86-64-v3: "-target-cpu" "x86-64-v3" +// RUN: %clang -target x86_64 -c -### %s -march=x86-64-v4 2>&1 | FileCheck %s --check-prefix=x86-64-v4 +// x86-64-v4: "-target-cpu" "x86-64-v4" diff --git a/clang/test/Driver/x86-mtune.c b/clang/test/Driver/x86-mtune.c --- a/clang/test/Driver/x86-mtune.c +++ b/clang/test/Driver/x86-mtune.c @@ -40,3 +40,8 @@ // RUN: | FileCheck %s -check-prefix=marchmtune // marchmtune: "-target-cpu" "core2" // mmarchmtune: "-tune-cpu" "nehalem" + +// RUN: not %clang %s -target x86_64 -E -mtune=x86-64-v2 2>&1 | FileCheck %s --check-prefix=INVALID +// RUN: not %clang %s -target x86_64 -E -mtune=x86-64-v3 2>&1 | FileCheck %s --check-prefix=INVALID +// RUN: not %clang %s -target x86_64 -E -mtune=x86-64-v4 2>&1 | FileCheck %s --check-prefix=INVALID +// INVALID: error: unknown target CPU '{{.*}}' diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -25,7 +25,7 @@ // X86-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, // X86-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, // X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, -// X86-SAME: x86-64, geode +// X86-SAME: x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode // RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64 // X86_64: error: unknown target CPU 'not-a-cpu' @@ -35,7 +35,8 @@ // X86_64-SAME: core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, // X86_64-SAME: icelake-client, icelake-server, tigerlake, sapphirerapids, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, // X86_64-SAME: athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, -// X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, x86-64 +// X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, +// X86_64-SAME: x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}} // RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86 // TUNE_X86: error: unknown target CPU 'not-a-cpu' diff --git a/clang/test/Preprocessor/predefined-arch-macros-x86.c b/clang/test/Preprocessor/predefined-arch-macros-x86.c new file mode 100644 --- /dev/null +++ b/clang/test/Preprocessor/predefined-arch-macros-x86.c @@ -0,0 +1,48 @@ +// RUN: %clang -target x86_64 -march=x86-64 -E -dM %s > %tv1 +// RUN: FileCheck %s --check-prefix=X86_64_V1 < %tv1 + +// X86_64_V1: #define __MMX__ 1 +// X86_64_V1: #define __SSE2_MATH__ 1 +// X86_64_V1: #define __SSE2__ 1 +// X86_64_V1: #define __SSE_MATH__ 1 +// X86_64_V1: #define __SSE__ 1 +// X86_64_V1: #define __amd64 1 +// X86_64_V1: #define __amd64__ 1 +// X86_64_V1: #define __k8 1 +// X86_64_V1: #define __k8__ 1 +// X86_64_V1: #define __x86_64 1 +// X86_64_V1: #define __x86_64__ 1 + +// RUN: %clang -target x86_64 -march=x86-64-v2 -E -dM %s > %tv2 +// RUN: diff %tv1 %tv2 > %t.txt || true +// RUN: FileCheck %s --check-prefix=X86_64_V2 < %t.txt + +/// TODO: __LAHF_SAHF__ +// X86_64_V2: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 1 +// X86_64_V2: #define __POPCNT__ 1 +// X86_64_V2: #define __SSE3__ 1 +// X86_64_V2-NEXT: #define __SSE4_1__ 1 +// X86_64_V2-NEXT: #define __SSE4_2__ 1 + +// RUN: %clang -target x86_64 -march=x86-64-v3 -E -dM %s > %tv3 +// RUN: diff %tv2 %tv3 > %t.txt || true +// RUN: FileCheck %s --check-prefix=X86_64_V3 < %t.txt + +// X86_64_V3: #define __AVX2__ 1 +// X86_64_V3: #define __BMI2__ 1 +// X86_64_V3: #define __FMA__ 1 +// X86_64_V3: #define __LZCNT__ 1 +// X86_64_V3: #define __MOVBE__ 1 +// X86_64_V3: #define __XSAVE__ 1 + +// RUN: %clang -target x86_64 -march=x86-64-v4 -E -dM %s > %tv4 +// RUN: diff %tv3 %tv4 > %t.txt || true +// RUN: FileCheck %s --check-prefix=X86_64_V4 < %t.txt + +// X86_64_V4: #define __AVX512BW__ 1 +// X86_64_V4-NEXT: #define __AVX512CD__ 1 +// X86_64_V4-NEXT: #define __AVX512DQ__ 1 +// X86_64_V4-NEXT: #define __AVX512F__ 1 +// X86_64_V4-NEXT: #define __AVX512VL__ 1 +// X86_64_V4-NOT: #define __AVX512{{.*}} +// X86_64_V4: #define __F16C__ 1 diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -2258,21 +2258,6 @@ // CHECK_X86_64_M32: #define __k8__ 1 // CHECK_X86_64_M32: #define i386 1 -// RUN: %clang -march=x86-64 -m64 -E -dM %s -o - 2>&1 \ -// RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_X86_64_M64 -// CHECK_X86_64_M64: #define __MMX__ 1 -// CHECK_X86_64_M64: #define __SSE2_MATH__ 1 -// CHECK_X86_64_M64: #define __SSE2__ 1 -// CHECK_X86_64_M64: #define __SSE_MATH__ 1 -// CHECK_X86_64_M64: #define __SSE__ 1 -// CHECK_X86_64_M64: #define __amd64 1 -// CHECK_X86_64_M64: #define __amd64__ 1 -// CHECK_X86_64_M64: #define __k8 1 -// CHECK_X86_64_M64: #define __k8__ 1 -// CHECK_X86_64_M64: #define __x86_64 1 -// CHECK_X86_64_M64: #define __x86_64__ 1 - // RUN: %clang -march=k8 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K8_M32 diff --git a/clang/test/Sema/builtin-cpu-supports.c b/clang/test/Sema/builtin-cpu-supports.c --- a/clang/test/Sema/builtin-cpu-supports.c +++ b/clang/test/Sema/builtin-cpu-supports.c @@ -15,6 +15,11 @@ if (__builtin_cpu_is("int")) // expected-error {{invalid cpu name for builtin}} a("intel"); + + (void)__builtin_cpu_is("x86-64"); // expected-error {{invalid cpu name for builtin}} + (void)__builtin_cpu_is("x86-64-v2"); // expected-error {{invalid cpu name for builtin}} + (void)__builtin_cpu_is("x86-64-v3"); // expected-error {{invalid cpu name for builtin}} + (void)__builtin_cpu_is("x86-64-v4"); // expected-error {{invalid cpu name for builtin}} #else if (__builtin_cpu_supports("vsx")) // expected-error {{use of unknown builtin}} a("vsx"); diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h --- a/llvm/include/llvm/Support/X86TargetParser.h +++ b/llvm/include/llvm/Support/X86TargetParser.h @@ -121,17 +121,24 @@ CK_ZNVER1, CK_ZNVER2, CK_x86_64, + CK_x86_64_v2, + CK_x86_64_v3, + CK_x86_64_v4, CK_Geode, }; /// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if /// \p Only64Bit is true. CPUKind parseArchX86(StringRef CPU, bool Only64Bit = false); +CPUKind parseTuneCPU(StringRef CPU, bool Only64Bit = false); /// Provide a list of valid CPU names. If \p Only64Bit is true, the list will /// only contain 64-bit capable CPUs. void fillValidCPUArchList(SmallVectorImpl &Values, bool Only64Bit = false); +/// Provide a list of valid -mtune names. +void fillValidTuneCPUList(SmallVectorImpl &Values, + bool Only64Bit = false); /// Get the key feature prioritizing target multiversioning. ProcessorFeatures getKeyFeature(CPUKind Kind); diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -137,6 +137,15 @@ // Basic 64-bit capable CPU. constexpr FeatureBitset FeaturesX86_64 = FeaturesPentium4 | Feature64BIT; +constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF | + FeaturePOPCNT | FeatureSSE4_2 | + FeatureCMPXCHG16B; +constexpr FeatureBitset FeaturesX86_64_V3 = + FeaturesX86_64_V2 | FeatureAVX2 | FeatureBMI2 | FeatureFMA | FeatureLZCNT | + FeatureMOVBE | FeatureXSAVE; +constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 | + FeatureAVX512BW | FeatureAVX512CD | + FeatureAVX512DQ | FeatureAVX512VL; // Intel Core CPUs constexpr FeatureBitset FeaturesCore2 = @@ -383,10 +392,15 @@ { {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2 }, // Generic 64-bit processor. { {"x86-64"}, CK_x86_64, ~0U, FeaturesX86_64 }, + { {"x86-64-v2"}, CK_x86_64_v2, ~0U, FeaturesX86_64_V2 }, + { {"x86-64-v3"}, CK_x86_64_v3, ~0U, FeaturesX86_64_V3 }, + { {"x86-64-v4"}, CK_x86_64_v4, ~0U, FeaturesX86_64_V4 }, // Geode processors. { {"geode"}, CK_Geode, ~0U, FeaturesGeode }, }; +constexpr const char *NoTuneList[] = {"x86-64-v2", "x86-64-v3", "x86-64-v4"}; + X86::CPUKind llvm::X86::parseArchX86(StringRef CPU, bool Only64Bit) { for (const auto &P : Processors) if (P.Name == CPU && (P.Features[FEATURE_64BIT] || !Only64Bit)) @@ -395,6 +409,12 @@ return CK_None; } +X86::CPUKind llvm::X86::parseTuneCPU(StringRef CPU, bool Only64Bit) { + if (llvm::is_contained(NoTuneList, CPU)) + return CK_None; + return parseArchX86(CPU, Only64Bit); +} + void llvm::X86::fillValidCPUArchList(SmallVectorImpl &Values, bool Only64Bit) { for (const auto &P : Processors) @@ -402,6 +422,14 @@ Values.emplace_back(P.Name); } +void llvm::X86::fillValidTuneCPUList(SmallVectorImpl &Values, + bool Only64Bit) { + for (const ProcInfo &P : Processors) + if (!P.Name.empty() && (P.Features[FEATURE_64BIT] || !Only64Bit) && + !llvm::is_contained(NoTuneList, P.Name)) + Values.emplace_back(P.Name); +} + ProcessorFeatures llvm::X86::getKeyFeature(X86::CPUKind Kind) { // FIXME: Can we avoid a linear search here? The table might be sorted by // CPUKind so we could binary search? diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -558,18 +558,26 @@ //===----------------------------------------------------------------------===// def ProcessorFeatures { + // x86-64 and x86-64-v[234] + list X86_64V1Features = [ + FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2, + FeatureFXSR, FeatureNOPL, Feature64Bit + ]; + list X86_64V2Features = !listconcat( + X86_64V1Features, + [FeatureCMPXCHG16B, FeatureSSE42, FeaturePOPCNT, FeatureLAHFSAHF]); + list X86_64V3Features = !listconcat(X86_64V2Features, [ + FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureFMA, FeatureLZCNT, FeatureMOVBE + ]); + list X86_64V4Features = !listconcat(X86_64V3Features, [ + FeatureBWI, + FeatureCDI, + FeatureDQI, + FeatureVLX, + ]); + // Nehalem - list NHMFeatures = [FeatureX87, - FeatureCMPXCHG8B, - FeatureCMOV, - FeatureMMX, - FeatureSSE42, - FeatureFXSR, - FeatureNOPL, - Feature64Bit, - FeatureCMPXCHG16B, - FeaturePOPCNT, - FeatureLAHFSAHF]; + list NHMFeatures = X86_64V2Features; list NHMTuning = [FeatureMacroFusion, FeatureInsertVZEROUPPER]; @@ -1350,16 +1358,7 @@ // covers a huge swath of x86 processors. If there are specific scheduling // knobs which need to be tuned differently for AMD chips, we might consider // forming a common base for them. -def : ProcModel<"x86-64", SandyBridgeModel, [ - FeatureX87, - FeatureCMPXCHG8B, - FeatureCMOV, - FeatureMMX, - FeatureSSE2, - FeatureFXSR, - FeatureNOPL, - Feature64Bit, -], +def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features, [ FeatureSlow3OpsLEA, FeatureSlowDivide64, @@ -1368,6 +1367,16 @@ FeatureInsertVZEROUPPER ]>; +// x86-64 micro-architecture levels. +def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features, + ProcessorFeatures.SNBTuning>; +// Close to Haswell. +def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features, + ProcessorFeatures.HSWTuning>; +// Close to AVX-512 level implemented by Xeon Scalable Processors. +def : ProcModel<"x86-64-v4", HaswellModel, ProcessorFeatures.X86_64V4Features, + ProcessorFeatures.SKXTuning>; + //===----------------------------------------------------------------------===// // Calling Conventions //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/cpus-other.ll b/llvm/test/CodeGen/X86/cpus-other.ll --- a/llvm/test/CodeGen/X86/cpus-other.ll +++ b/llvm/test/CodeGen/X86/cpus-other.ll @@ -16,6 +16,11 @@ ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=c3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=c3-2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +;; x86-64 micro-architecture levels. +; RUN: llc %s -filetype=null -mtriple=x86_64 -mcpu=x86-64-v2 +; RUN: llc %s -filetype=null -mtriple=x86_64 -mcpu=x86-64-v3 +; RUN: llc %s -filetype=null -mtriple=x86_64 -mcpu=x86-64-v4 + define void @foo() { ret void }