diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -18,6 +18,7 @@
 #include "llvm/IR/IntrinsicsAArch64.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 #include <algorithm>
 using namespace llvm;
 using namespace llvm::PatternMatch;
@@ -1105,6 +1106,36 @@
   if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
       EnableFalkorHWPFUnrollFix)
     getFalkorUnrollingPreferences(L, SE, UP);
+
+  // Scan the loop: don't unroll loops with calls as this could prevent
+  // inlining.
+  for (auto *BB : L->getBlocks()) {
+    for (auto &I : *BB) {
+      // Don't unroll vectorised loop.
+      if (I.getType()->isVectorTy())
+        return;
+
+      if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+        if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
+          if (!isLoweredToCall(F))
+            continue;
+        }
+        return;
+      }
+    }
+  }
+
+  // Force runtime unrolling for in-order models
+  // If mcpu is omitted, then an in-order model will be used, meaning this
+  // effects the "default" compilation configuration for Aarch64.
+  // TODO This might be beneficial for out-of-order models too
+  if (!ST->getSchedModel().isOutOfOrder()) {
+    UP.Runtime = true;
+    UP.Partial = true;
+    UP.UpperBound = true;
+    UP.UnrollRemainder = true;
+    UP.DefaultUnrollRuntimeCount = 4;
+  }
 }
 
 void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
--- a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
@@ -1,5 +1,7 @@
 ; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=true  | FileCheck %s -check-prefix=EPILOG
 ; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-r82 -unroll-runtime-epilog=true  | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-r82 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
 
 ; Tests for unrolling loops with run-time trip counts
 
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll
@@ -104,7 +104,7 @@
 for.cond:                                         ; preds = %for.inc, %entry
   %1 = load i32, i32* %i, align 4
   %cmp = icmp ult i32 %1, 20000
-  br i1 %cmp, label %for.body, label %for.cond.cleanup
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !0
 
 for.cond.cleanup:                                 ; preds = %for.cond
   %2 = bitcast i32* %i to i8*
@@ -138,3 +138,6 @@
 declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
 
 declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.unroll.disable", i32 1}
\ No newline at end of file