diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5010,6 +5010,14 @@
     Assert(Size % 16 == 0, "bswap must be an even number of bytes", &Call);
     break;
   }
+  case Intrinsic::invariant_start: {
+    ConstantInt *InvariantSize = dyn_cast<ConstantInt>(Call.getArgOperand(0));
+    Assert(InvariantSize &&
+               (!InvariantSize->isNegative() || InvariantSize->isMinusOne()),
+           "invariant_start parameter must be -1, 0 or a positive number",
+           &Call);
+    break;
+  }
   case Intrinsic::matrix_multiply:
   case Intrinsic::matrix_transpose:
   case Intrinsic::matrix_column_major_load:
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -940,7 +940,19 @@
                                   Loop *CurLoop) {
   Value *Addr = LI->getOperand(0);
   const DataLayout &DL = LI->getModule()->getDataLayout();
-  const uint32_t LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
+  const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
+
+  // It is not currently possible for clang to generate an invariant.start
+  // intrinsic with scalable vector types because we don't support thread local
+  // sizeless types and we don't permit sizeless types in structs or classes.
+  // Furthermore, even if support is added for this in future the intrinsic
+  // itself is defined to have a size of -1 for variable sized objects. This
+  // makes it impossible to verify if the intrinsic envelops our region of
+  // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
+  // types would have a -1 parameter, but the former is clearly double the size
+  // of the latter.
+  if (LocSizeInBits.isScalable())
+    return false;
 
   // if the type is i8 addrspace(x)*, we know this is the type of
   // llvm.invariant.start operand
@@ -970,13 +982,17 @@
     if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
         !II->use_empty())
       continue;
-    unsigned InvariantSizeInBits =
-        cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
+    ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0));
+    // The intrinsic supports having a -1 argument for variable sized objects
+    // so we should check for that here.
+    if (InvariantSize->isNegative())
+      continue;
+    uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
     // Confirm the invariant.start location size contains the load operand size
     // in bits. Also, the invariant.start should dominate the load, and we
     // should not hoist the load out of a loop that contains this dominating
     // invariant.start.
-    if (LocSizeInBits <= InvariantSizeInBits &&
+    if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits &&
         DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
       return true;
   }
diff --git a/llvm/test/Transforms/LICM/AArch64/lit.local.cfg b/llvm/test/Transforms/LICM/AArch64/lit.local.cfg
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/LICM/AArch64/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Transforms/LICM/AArch64/sve-load-hoist.ll b/llvm/test/Transforms/LICM/AArch64/sve-load-hoist.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/LICM/AArch64/sve-load-hoist.ll
@@ -0,0 +1,30 @@
+; RUN: opt -licm -mtriple aarch64-linux-gnu -mattr=+sve -S < %s | FileCheck %s
+
+define void @no_hoist_load1_nxv2i64(<vscale x 2 x i64>* %out, i8* %in8, i32 %n) {
+; CHECK-LABEL: @no_hoist_load1_nxv2i64(
+; CHECK: entry:
+; CHECK-NOT: load
+; CHECK: for.body:
+; CHECK: load
+entry:
+  %cmp0 = icmp ugt i32 %n, 0
+  %invst = call {}* @llvm.invariant.start.p0i8(i64 16, i8* %in8)
+  %in = bitcast i8* %in8 to <vscale x 2 x i64>*
+  br i1 %cmp0, label %for.body, label %for.end
+
+for.body:
+  %i = phi i32 [0, %entry], [%inc, %for.body]
+  %i2 = zext i32 %i to i64
+  %ptr = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %out, i64 %i2
+  %val = load <vscale x 2 x i64>, <vscale x 2 x i64>* %in, align 16
+  store <vscale x 2 x i64> %val, <vscale x 2 x i64>* %ptr, align 16
+  %inc = add nuw nsw i32 %i, 1
+  %cmp = icmp ult i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
+
diff --git a/llvm/test/Transforms/LICM/hoisting.ll b/llvm/test/Transforms/LICM/hoisting.ll
--- a/llvm/test/Transforms/LICM/hoisting.ll
+++ b/llvm/test/Transforms/LICM/hoisting.ll
@@ -360,3 +360,36 @@
 loopexit:
   ret i32 %sum
 }
+
+; We can't hoist the invariant load out of the loop because
+; the marker is given a variable size (-1).
+define i32 @test_fence5(i8* %addr, i32 %n, i8* %volatile) {
+; CHECK-LABEL: @test_fence5
+; CHECK-LABEL: entry
+; CHECK: invariant.start
+; CHECK-NOT: %addrld = load atomic i32, i32* %addr.i unordered, align 8
+; CHECK: br label %loop
+entry:
+  %gep = getelementptr inbounds i8, i8* %addr, i64 8
+  %addr.i = bitcast i8* %gep to i32 *
+  store atomic i32 5, i32 * %addr.i unordered, align 8
+  fence release
+  %invst = call {}* @llvm.invariant.start.p0i8(i64 -1, i8* %gep)
+  br label %loop
+
+loop:
+  %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ]
+  %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ]
+  %volload = load atomic i8, i8* %volatile unordered, align 8
+  fence acquire
+  %volchk = icmp eq i8 %volload, 0
+  %addrld = load atomic i32, i32* %addr.i unordered, align 8
+  %sel = select i1 %volchk, i32 0, i32 %addrld
+  %sum.next = add i32 %sel, %sum
+  %indvar.next = add i32 %indvar, 1
+  %cond = icmp slt i32 %indvar.next, %n
+  br i1 %cond, label %loop, label %loopexit
+
+loopexit:
+  ret i32 %sum
+}