Index: include/llvm/Support/X86TargetParser.def =================================================================== --- include/llvm/Support/X86TargetParser.def +++ include/llvm/Support/X86TargetParser.def @@ -90,6 +90,7 @@ X86_CPU_SUBTYPE_COMPAT("bdver3", AMDFAM15H_BDVER3, "bdver3") X86_CPU_SUBTYPE_COMPAT("bdver4", AMDFAM15H_BDVER4, "bdver4") X86_CPU_SUBTYPE_COMPAT("znver1", AMDFAM17H_ZNVER1, "znver1") +X86_CPU_SUBTYPE_COMPAT("znver2", AMDFAM17H_ZNVER2, "znver2") X86_CPU_SUBTYPE_COMPAT("ivybridge", INTEL_COREI7_IVYBRIDGE, "ivybridge") X86_CPU_SUBTYPE_COMPAT("haswell", INTEL_COREI7_HASWELL, "haswell") X86_CPU_SUBTYPE_COMPAT("broadwell", INTEL_COREI7_BROADWELL, "broadwell") Index: lib/Support/Host.cpp =================================================================== --- lib/Support/Host.cpp +++ lib/Support/Host.cpp @@ -916,7 +916,14 @@ break; // "btver2" case 23: *Type = X86::AMDFAM17H; - *Subtype = X86::AMDFAM17H_ZNVER1; + if (Model >= 0x30 && Model <= 0x3f) { + *Subtype = X86::AMDFAM17H_ZNVER2; + break; // "znver2"; 30h-3fh: Zen2 + } + if (Model <= 0x0f) { + *Subtype = X86::AMDFAM17H_ZNVER1; + break; // "znver1"; 00h-0Fh: Zen1 + } break; default: break; // "generic" Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -1144,15 +1144,14 @@ FeatureMacroFusion ]>; -// Znver1 -def: ProcessorModel<"znver1", Znver1Model, [ +// AMD Zen Processors common ISAs +def ZNFeatures : ProcessorFeatures<[], [ FeatureADX, FeatureAES, FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureCLFLUSHOPT, - FeatureCLZERO, FeatureCMOV, Feature64Bit, FeatureCMPXCHG16B, @@ -1184,6 +1183,21 @@ FeatureXSAVEOPT, FeatureXSAVES]>; +class Znver1Proc : ProcModel; +def : Znver1Proc<"znver1">; + +class Znver2Proc : ProcModel; +def : Znver2Proc<"znver2">; + def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; Index: test/CodeGen/X86/cpus-amd.ll =================================================================== --- test/CodeGen/X86/cpus-amd.ll +++ test/CodeGen/X86/cpus-amd.ll @@ -26,6 +26,7 @@ ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty define void @foo() { ret void Index: test/CodeGen/X86/lzcnt-zext-cmp.ll =================================================================== --- test/CodeGen/X86/lzcnt-zext-cmp.ll +++ test/CodeGen/X86/lzcnt-zext-cmp.ll @@ -5,6 +5,8 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s ; Test one 32-bit input, output is 32-bit, no transformations expected. define i32 @test_zext_cmp0(i32 %a) { Index: test/CodeGen/X86/slow-unaligned-mem.ll =================================================================== --- test/CodeGen/X86/slow-unaligned-mem.ll +++ test/CodeGen/X86/slow-unaligned-mem.ll @@ -47,6 +47,7 @@ ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=FAST ; Other chips with slow unaligned memory accesses Index: test/CodeGen/X86/x86-64-double-shifts-var.ll =================================================================== --- test/CodeGen/X86/x86-64-double-shifts-var.ll +++ test/CodeGen/X86/x86-64-double-shifts-var.ll @@ -13,8 +13,9 @@ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver3 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s -; Verify that for the X86_64 processors that are known to have poor latency +; Verify that for the X86_64 processors that are known to have poor latency ; double precision shift instructions we do not generate 'shld' or 'shrd' ; instructions. @@ -25,7 +26,7 @@ define i64 @lshift(i64 %a, i64 %b, i32 %c) nounwind readnone { entry: -; CHECK-NOT: shld +; CHECK-NOT: shld %sh_prom = zext i32 %c to i64 %shl = shl i64 %a, %sh_prom %sub = sub nsw i32 64, %c