Index: llvm/trunk/lib/Support/Host.cpp =================================================================== --- llvm/trunk/lib/Support/Host.cpp +++ llvm/trunk/lib/Support/Host.cpp @@ -111,6 +111,7 @@ AMDATHLON, AMDFAM14H, AMDFAM16H, + AMDFAM17H, CPU_TYPE_MAX }; @@ -149,6 +150,7 @@ AMD_BTVER2, AMDFAM15H_BDVER3, AMDFAM15H_BDVER4, + AMDFAM17H_ZNVER1, CPU_SUBTYPE_MAX }; @@ -742,6 +744,14 @@ } *Subtype = AMD_BTVER2; break; // "btver2" + case 23: + *Type = AMDFAM17H; + if (Features & (1 << FEATURE_ADX)) { + *Subtype = AMDFAM17H_ZNVER1; + break; // "znver1" + } + *Subtype = AMD_BTVER1; + break; default: break; // "generic" } @@ -950,6 +960,15 @@ default: return "amdfam16"; } + case AMDFAM17H: + switch (Subtype) { + case AMD_BTVER1: + return "btver1"; + case AMDFAM17H_ZNVER1: + return "znver1"; + default: + return "amdfam17"; + } default: return "generic"; } Index: llvm/trunk/lib/Target/X86/X86.td =================================================================== --- llvm/trunk/lib/Target/X86/X86.td +++ llvm/trunk/lib/Target/X86/X86.td @@ -760,6 +760,42 @@ FeatureMWAITX ]>; +// TODO: The scheduler model falls to BTVER2 model. +// The znver1 model has to be put in place. +// Zen +def: ProcessorModel<"znver1", BtVer2Model, [ + FeatureADX, + FeatureAES, + FeatureAVX2, + FeatureBMI, + FeatureBMI2, + FeatureCLFLUSHOPT, + FeatureCMPXCHG16B, + FeatureF16C, + FeatureFMA, + FeatureFSGSBase, + FeatureFXSR, + FeatureFastLZCNT, + FeatureLAHFSAHF, + FeatureLZCNT, + FeatureMMX, + FeatureMOVBE, + FeatureMWAITX, + FeaturePCLMUL, + FeaturePOPCNT, + FeaturePRFCHW, + FeatureRDRAND, + FeatureRDSEED, + FeatureSHA, + FeatureSMAP, + FeatureSSE4A, + FeatureSlowSHLD, + FeatureX87, + FeatureXSAVE, + FeatureXSAVEC, + FeatureXSAVEOPT, + FeatureXSAVES]>; + def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; Index: llvm/trunk/test/CodeGen/X86/cpus.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/cpus.ll +++ llvm/trunk/test/CodeGen/X86/cpus.ll @@ -33,3 +33,4 @@ ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty Index: llvm/trunk/test/CodeGen/X86/lzcnt-zext-cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/lzcnt-zext-cmp.ll +++ llvm/trunk/test/CodeGen/X86/lzcnt-zext-cmp.ll @@ -3,6 +3,8 @@ ; Eg: zext(or(setcc(cmp), setcc(cmp))) -> shr(or(lzcnt, lzcnt)) ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=NOFASTLZCNT %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=NOFASTLZCNT %s ; Test one 32-bit input, output is 32-bit, no transformations expected. define i32 @test_zext_cmp0(i32 %a) { Index: llvm/trunk/test/CodeGen/X86/slow-unaligned-mem.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/slow-unaligned-mem.ll +++ llvm/trunk/test/CodeGen/X86/slow-unaligned-mem.ll @@ -46,6 +46,7 @@ ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=FAST ; Other chips with slow unaligned memory accesses Index: llvm/trunk/test/CodeGen/X86/x86-64-double-shifts-var.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/x86-64-double-shifts-var.ll +++ llvm/trunk/test/CodeGen/X86/x86-64-double-shifts-var.ll @@ -17,6 +17,7 @@ ; RUN: llc < %s -march=x86-64 -mcpu=bdver2 | FileCheck %s ; RUN: llc < %s -march=x86-64 -mcpu=bdver3 | FileCheck %s ; RUN: llc < %s -march=x86-64 -mcpu=bdver4 | FileCheck %s +; RUN: llc < %s -march=x86-64 -mcpu=znver1 | FileCheck %s ; Verify that for the X86_64 processors that are known to have poor latency ; double precision shift instructions we do not generate 'shld' or 'shrd'