Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -264,6 +264,8 @@ /// transformation will select an unrolling factor based on the current cost /// threshold and other factors. unsigned Count; + /// Default unroll count for loops with run-time trip count. + unsigned DefaultUnrollRuntimeCount; // Set the maximum unrolling factor. The unrolling factor may be selected // using the appropriate cost threshold, but may not exceed this number // (set to UINT_MAX to disable). This does not apply in cases where the Index: lib/Target/SystemZ/SystemZTargetTransformInfo.h =================================================================== --- lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -50,6 +50,8 @@ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + /// @} /// \name Vector TTI Implementations Index: lib/Target/SystemZ/SystemZTargetTransformInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -238,6 +238,63 @@ return TTI::PSK_Software; } +void SystemZTTIImpl::getUnrollingPreferences(Loop *L, + TTI::UnrollingPreferences &UP) { + // Find out if L contains a call, what the machine instruction count + // estimate is, and how many stores there are. + bool HasCall = false; + unsigned NumStores = 0; + for (auto &BB : L->blocks()) + for (auto &I : *BB) { + if (isa(&I) || isa(&I)) { + ImmutableCallSite CS(&I); + if (const Function *F = CS.getCalledFunction()) { + if (isLoweredToCall(F)) + HasCall = true; + if (F->getIntrinsicID() == Intrinsic::memcpy || + F->getIntrinsicID() == Intrinsic::memset) + NumStores++; + } else { // indirect call. + HasCall = true; + } + } + if (isa(&I)) { + NumStores++; + Type *MemAccessTy = I.getOperand(0)->getType(); + if((MemAccessTy->isIntegerTy() || MemAccessTy->isFloatingPointTy()) && + (getDataLayout().getTypeSizeInBits(MemAccessTy) == 128)) + NumStores++; // 128 bit fp/int stores get split. + } + } + + // The z13 processor will run out of store tags if too many stores + // are fed into it too quickly. Therefore make sure there are not + // too many stores in the resulting unrolled loop. + unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX); + + if (HasCall) { + // Only allow full unrolling if loop has any calls. + UP.FullUnrollMaxCount = Max; + UP.MaxCount = 1; + return; + } + + UP.MaxCount = Max; + if (UP.MaxCount <= 1) + return; + + // Allow partial and runtime trip count unrolling. + UP.Partial = UP.Runtime = true; + + UP.PartialThreshold = 75; + UP.DefaultUnrollRuntimeCount = 4; + + // Allow expensive instructions in the pre-header of the loop. + UP.AllowExpensiveTripCount = true; + + UP.Force = true; +} + unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) { if (!Vector) // Discount the stack pointer. Also leave out %r0, since it can't Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -102,10 +102,6 @@ /// code expansion would result. static const unsigned NoThreshold = UINT_MAX; -/// Default unroll count for loops with run-time trip count if -/// -unroll-count is not set -static const unsigned DefaultUnrollRuntimeCount = 8; - /// Gather the various unrolling parameters based on the defaults, compiler /// flags, TTI overrides and user specified parameters. static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( @@ -122,6 +118,7 @@ UP.PartialThreshold = UP.Threshold; UP.PartialOptSizeThreshold = 0; UP.Count = 0; + UP.DefaultUnrollRuntimeCount = 8; UP.MaxCount = UINT_MAX; UP.FullUnrollMaxCount = UINT_MAX; UP.Partial = false; @@ -803,7 +800,7 @@ // largest power-of-two factor that satisfies the threshold limit. // As we'll create fixup loop, do the type of unrolling only if // remainder loop is allowed. - UP.Count = DefaultUnrollRuntimeCount; + UP.Count = UP.DefaultUnrollRuntimeCount; UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns; while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) { UP.Count >>= 1; @@ -852,7 +849,7 @@ return false; } if (UP.Count == 0) - UP.Count = DefaultUnrollRuntimeCount; + UP.Count = UP.DefaultUnrollRuntimeCount; UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns; // Reduce unroll count to be the largest power-of-two factor of