diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td --- a/llvm/lib/Target/X86/X86ScheduleZnver4.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td @@ -37,7 +37,8 @@ // Ideally for znver4, we should have 6.75K. However we don't add that // considerting the impact compile time and prefer using default values // instead. - // let LoopMicroOpBufferSize = 6750; + // Retaining minimal value to influence unrolling as we did for znver3. + let LoopMicroOpBufferSize = 512; // AMD SOG 19h, 2.6.2 L1 Data Cache // The L1 data cache has a 4- or 5- cycle integer load-to-use latency. // AMD SOG 19h, 2.12 L1 Data Cache diff --git a/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll b/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll --- a/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll +++ b/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll @@ -1,5 +1,6 @@ ; RUN: opt -passes=debugify,loop-unroll -mcpu=znver3 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s ; RUN: opt -passes=debugify,loop-unroll -mcpu=znver3 -pass-remarks=TTI -pass-remarks-analysis=TTI < %s -S 2>&1 | FileCheck --check-prefixes=ALL,TTI %s +; RUN: opt -passes=debugify,loop-unroll -mcpu=znver4 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"