diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1105,6 +1105,38 @@ if (ST->getProcFamily() == AArch64Subtarget::Falkor && EnableFalkorHWPFUnrollFix) getFalkorUnrollingPreferences(L, SE, UP); + + // Scan the loop: don't unroll loops with calls as this could prevent + // inlining. Don't unroll vector loops either, as they don't benefit much from + // unrolling. + for (auto *BB : L->getBlocks()) { + for (auto &I : *BB) { + // Don't unroll vectorised loop. + if (I.getType()->isVectorTy()) + return; + + if (isa(I) || isa(I)) { + if (const Function *F = cast(I).getCalledFunction()) { + if (!isLoweredToCall(F)) + continue; + } + return; + } + } + } + + // Enable runtime unrolling for in-order models + // If mcpu is omitted, getProcFamily() returns AArch64Subtarget::Others, so by + // checking for that case, we can ensure that the default behaviour is + // unchanged + if (ST->getProcFamily() != AArch64Subtarget::Others && + !ST->getSchedModel().isOutOfOrder()) { + UP.Runtime = true; + UP.Partial = true; + UP.UpperBound = true; + UP.UnrollRemainder = true; + UP.DefaultUnrollRuntimeCount = 4; + } } void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll --- a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll +++ b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll @@ -1,5 +1,7 @@ ; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG ; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG +; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-r82 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-r82 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG ; Tests for unrolling loops with run-time trip counts