diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include using namespace llvm; using namespace llvm::PatternMatch; @@ -1105,6 +1106,36 @@ if (ST->getProcFamily() == AArch64Subtarget::Falkor && EnableFalkorHWPFUnrollFix) getFalkorUnrollingPreferences(L, SE, UP); + + // Scan the loop: don't unroll loops with calls as this could prevent + // inlining. + for (auto *BB : L->getBlocks()) { + for (auto &I : *BB) { + // Don't unroll vectorised loop. + if (I.getType()->isVectorTy()) + return; + + if (isa(I) || isa(I)) { + if (const Function *F = cast(I).getCalledFunction()) { + if (!isLoweredToCall(F)) + continue; + } + return; + } + } + } + + // Force runtime unrolling for in-order models + // If mcpu is omitted, then an in-order model will be used, meaning this + // effects the "default" compilation configuration for Aarch64. + // TODO This might be beneficial for out-of-order models too + if (!ST->getSchedModel().isOutOfOrder()) { + UP.Runtime = true; + UP.Partial = true; + UP.UpperBound = true; + UP.UnrollRemainder = true; + UP.DefaultUnrollRuntimeCount = 4; + } } void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll --- a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll +++ b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll @@ -1,5 +1,7 @@ ; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG ; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG +; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-r82 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-r82 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG ; Tests for unrolling loops with run-time trip counts diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll --- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll @@ -104,7 +104,7 @@ for.cond: ; preds = %for.inc, %entry %1 = load i32, i32* %i, align 4 %cmp = icmp ult i32 %1, 20000 - br i1 %cmp, label %for.body, label %for.cond.cleanup + br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !0 for.cond.cleanup: ; preds = %for.cond %2 = bitcast i32* %i to i8* @@ -138,3 +138,6 @@ declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.unroll.disable", i32 1} \ No newline at end of file