Index: lib/Transform/ScheduleOptimizer.cpp =================================================================== --- lib/Transform/ScheduleOptimizer.cpp +++ lib/Transform/ScheduleOptimizer.cpp @@ -140,6 +140,26 @@ cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); +static cl::opt DefaultCacheLevelAssociativity( + "polly-target-default-cache-level-associativity", + cl::desc("The default cache level associativity (if not enough were " + "provided by --polly-target-cache-level-associativity)."), + cl::Hidden, cl::init(8), cl::ZeroOrMore, cl::cat(PollyCategory)); + +static cl::opt DefaultFirstCacheLevelSize( + "polly-target-first-cache-level-size", + cl::desc( + "The default size of the first cache level specified in bytes (if not " + "enough were provided by --polly-target-cache-level-sizes)."), + cl::Hidden, cl::ZeroOrMore, cl::init(32768), cl::cat(PollyCategory)); + +static cl::opt DefaultSecondCacheLevelSize( + "polly-target-second-cache-level-size", + cl::desc( + "The default size of the second cache level specified in bytes (if " + "not enough were provided by --polly-target-cache-level-sizes)."), + cl::Hidden, cl::ZeroOrMore, cl::init(262144), cl::cat(PollyCategory)); + static cl::list CacheLevelSizes( "polly-target-cache-level-sizes", cl::desc("The size of each cache level specified in bytes."), cl::Hidden, @@ -606,27 +626,34 @@ /// @see MicroKernelParamsTy static struct MacroKernelParamsTy getMacroKernelParams(const MicroKernelParamsTy &MicroKernelParams) { + // The quotient should be greater than zero. + if (!(MicroKernelParams.Mr > 0 && MicroKernelParams.Nr > 0 && + PollyPatternMatchingNcQuotient > 0)) + return {1, 1, 1}; + // According to www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf, // it requires information about the first two levels of a cache to determine // all the parameters of a macro-kernel. It also checks that an associativity - // degree of a cache level is greater than two. Otherwise, another algorithm - // for determination of the parameters should be used. - if (!(MicroKernelParams.Mr > 0 && MicroKernelParams.Nr > 0 && - CacheLevelSizes.size() >= 2 && CacheLevelAssociativity.size() >= 2 && - CacheLevelSizes[0] > 0 && CacheLevelSizes[1] > 0 && - CacheLevelAssociativity[0] > 2 && CacheLevelAssociativity[1] > 2)) - return {1, 1, 1}; - // The quotient should be greater than zero. - if (PollyPatternMatchingNcQuotient <= 0) - return {1, 1, 1}; + // degree of a cache level is greater than two. Otherwise, the default values + // are used. Since there is no typical values of this parameters, we use the + // parameters of Intel Core i7-3820 SandyBridge that also help to attain the + // high-performance on IBM POWER System S822 and IBM Power 730 Express server. + int Associativity[2] = {DefaultCacheLevelAssociativity, + DefaultCacheLevelAssociativity}; + int Sizes[2] = {DefaultFirstCacheLevelSize, DefaultSecondCacheLevelSize}; + for (unsigned i = 0; i < 2; i++) { + if (CacheLevelAssociativity.size() > i && CacheLevelAssociativity[i] > 2) + Associativity[i] = CacheLevelAssociativity[i]; + if (CacheLevelSizes.size() > i && CacheLevelSizes[i] > 0) + Sizes[i] = CacheLevelSizes[i]; + } + int Car = floor( - (CacheLevelAssociativity[0] - 1) / + (Associativity[0] - 1) / (1 + static_cast(MicroKernelParams.Nr) / MicroKernelParams.Mr)); - int Kc = (Car * CacheLevelSizes[0]) / - (MicroKernelParams.Mr * CacheLevelAssociativity[0] * 8); - double Cac = static_cast(Kc * 8 * CacheLevelAssociativity[1]) / - CacheLevelSizes[1]; - int Mc = floor((CacheLevelAssociativity[1] - 2) / Cac); + int Kc = (Car * Sizes[0]) / (MicroKernelParams.Mr * Associativity[0] * 8); + double Cac = static_cast(Kc * 8 * Associativity[1]) / Sizes[1]; + int Mc = floor((Associativity[1] - 2) / Cac); int Nc = PollyPatternMatchingNcQuotient * MicroKernelParams.Nr; return {Mc, Nc, Kc}; } Index: test/ScheduleOptimizer/pattern-matching-based-opts_3.ll =================================================================== --- test/ScheduleOptimizer/pattern-matching-based-opts_3.ll +++ test/ScheduleOptimizer/pattern-matching-based-opts_3.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-target-throughput-vector-fma=1 -polly-target-latency-vector-fma=8 -analyze -polly-ast < %s 2>&1 | FileCheck %s +; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-target-throughput-vector-fma=1 -polly-target-latency-vector-fma=8 -analyze -polly-ast -polly-pattern-matching-nc-quotient=0 < %s 2>&1 | FileCheck %s ; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-target-throughput-vector-fma=1 -polly-target-latency-vector-fma=8 -analyze -polly-ast -polly-target-cache-level-associativity=8,8 -polly-target-cache-level-sizes=32768,262144 < %s 2>&1 | FileCheck %s --check-prefix=EXTRACTION-OF-MACRO-KERNEL ; ; /* C := alpha*A*B + beta*C */