Index: llvm/trunk/include/llvm/Support/X86TargetParser.def =================================================================== --- llvm/trunk/include/llvm/Support/X86TargetParser.def +++ llvm/trunk/include/llvm/Support/X86TargetParser.def @@ -98,6 +98,7 @@ X86_CPU_SUBTYPE_COMPAT("cannonlake", INTEL_COREI7_CANNONLAKE, "cannonlake") X86_CPU_SUBTYPE_COMPAT("icelake-client", INTEL_COREI7_ICELAKE_CLIENT, "icelake-client") X86_CPU_SUBTYPE_COMPAT("icelake-server", INTEL_COREI7_ICELAKE_SERVER, "icelake-server") +X86_CPU_SUBTYPE_COMPAT("znver2", AMDFAM17H_ZNVER2, "znver2") // Entries below this are not in libgcc/compiler-rt. X86_CPU_SUBTYPE ("core2", INTEL_CORE2_65) X86_CPU_SUBTYPE ("penryn", INTEL_CORE2_45) Index: llvm/trunk/lib/Support/Host.cpp =================================================================== --- llvm/trunk/lib/Support/Host.cpp +++ llvm/trunk/lib/Support/Host.cpp @@ -916,7 +916,14 @@ break; // "btver2" case 23: *Type = X86::AMDFAM17H; - *Subtype = X86::AMDFAM17H_ZNVER1; + if (Model >= 0x30 && Model <= 0x3f) { + *Subtype = X86::AMDFAM17H_ZNVER2; + break; // "znver2"; 30h-3fh: Zen2 + } + if (Model <= 0x0f) { + *Subtype = X86::AMDFAM17H_ZNVER1; + break; // "znver1"; 00h-0Fh: Zen1 + } break; default: break; // "generic" Index: llvm/trunk/lib/Target/X86/X86.td =================================================================== --- llvm/trunk/lib/Target/X86/X86.td +++ llvm/trunk/lib/Target/X86/X86.td @@ -1143,8 +1143,8 @@ FeatureMacroFusion ]>; -// Znver1 -def: ProcessorModel<"znver1", Znver1Model, [ +// AMD Zen Processors common ISAs +def ZNFeatures : ProcessorFeatures<[], [ FeatureADX, FeatureAES, FeatureAVX2, @@ -1183,6 +1183,19 @@ FeatureXSAVEOPT, FeatureXSAVES]>; +class Znver1Proc : ProcModel; +def : Znver1Proc<"znver1">; + +class Znver2Proc : ProcModel; +def : Znver2Proc<"znver2">; + def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; Index: llvm/trunk/test/CodeGen/X86/cpus-amd.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/cpus-amd.ll +++ llvm/trunk/test/CodeGen/X86/cpus-amd.ll @@ -26,6 +26,7 @@ ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty define void @foo() { ret void Index: llvm/trunk/test/CodeGen/X86/lzcnt-zext-cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/lzcnt-zext-cmp.ll +++ llvm/trunk/test/CodeGen/X86/lzcnt-zext-cmp.ll @@ -5,6 +5,8 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s ; Test one 32-bit input, output is 32-bit, no transformations expected. define i32 @test_zext_cmp0(i32 %a) { Index: llvm/trunk/test/CodeGen/X86/slow-unaligned-mem.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/slow-unaligned-mem.ll +++ llvm/trunk/test/CodeGen/X86/slow-unaligned-mem.ll @@ -47,6 +47,7 @@ ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=FAST ; Other chips with slow unaligned memory accesses Index: llvm/trunk/test/CodeGen/X86/x86-64-double-shifts-var.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/x86-64-double-shifts-var.ll +++ llvm/trunk/test/CodeGen/X86/x86-64-double-shifts-var.ll @@ -13,8 +13,9 @@ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver3 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s -; Verify that for the X86_64 processors that are known to have poor latency +; Verify that for the X86_64 processors that are known to have poor latency ; double precision shift instructions we do not generate 'shld' or 'shrd' ; instructions. @@ -25,7 +26,7 @@ define i64 @lshift(i64 %a, i64 %b, i32 %c) nounwind readnone { entry: -; CHECK-NOT: shld +; CHECK-NOT: shld %sh_prom = zext i32 %c to i64 %shl = shl i64 %a, %sh_prom %sub = sub nsw i32 64, %c