diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -94,6 +94,18 @@
   Type *CastElTy = PTy->getElementType();
   if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
 
+  // This optimisation does not work for cases where the cast type
+  // is scalable and the allocated type is not. This because we need to
+  // know how many times the casted type fits into the allocated type.
+  // For the opposite case where the allocated type is scalable and the
+  // cast type is not this leads to poor code quality due to the
+  // introduction of 'vscale' into the calculations. It seems better to
+  // bail out for this case too until we've done a proper cost-benefit
+  // analysis.
+  bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy);
+  bool CastIsScalable = isa<ScalableVectorType>(CastElTy);
+  if (AllocIsScalable != CastIsScalable) return nullptr;
+
   Align AllocElTyAlign = DL.getABITypeAlign(AllocElTy);
   Align CastElTyAlign = DL.getABITypeAlign(CastElTy);
   if (CastElTyAlign < AllocElTyAlign) return nullptr;
@@ -103,14 +115,15 @@
   // same, we open the door to infinite loops of various kinds.
   if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
 
-  uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy);
-  uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy);
+  // The alloc and cast types should be either both fixed or both scalable.
+  uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinSize();
+  uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinSize();
   if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
 
   // If the allocation has multiple uses, only promote it if we're not
   // shrinking the amount of memory being allocated.
-  uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy);
-  uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy);
+  uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy).getKnownMinSize();
+  uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinSize();
   if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
 
   // See if we can satisfy the modulus by pulling a scale out of the array
@@ -125,6 +138,9 @@
   if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
       (AllocElTySize*ArrayOffset   ) % CastElTySize != 0) return nullptr;
 
+  // We don't currently support arrays of scalable types.
+  assert(!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0));
+
   unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
   Value *Amt = nullptr;
   if (Scale == 1) {
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-cast-of-alloc.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-cast-of-alloc.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-cast-of-alloc.ll
@@ -0,0 +1,142 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+define void @fixed_array16i32_to_scalable4i32(<vscale x 4 x i32>* %out) {
+; CHECK-LABEL: @fixed_array16i32_to_scalable4i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca [16 x i32], align 16
+; CHECK-NEXT:    [[CAST:%.*]] = bitcast [16 x i32]* [[TMP]] to <vscale x 4 x i32>*
+; CHECK-NEXT:    store volatile <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* [[CAST]], align 16
+; CHECK-NEXT:    [[RELOAD:%.*]] = load volatile <vscale x 4 x i32>, <vscale x 4 x i32>* [[CAST]], align 16
+; CHECK-NEXT:    store <vscale x 4 x i32> [[RELOAD]], <vscale x 4 x i32>* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = alloca [16 x i32], align 16
+  %cast = bitcast [16 x i32]* %tmp to <vscale x 4 x i32>*
+  store volatile <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* %cast, align 16
+  %reload = load volatile <vscale x 4 x i32>, <vscale x 4 x i32>* %cast, align 16
+  store <vscale x 4 x i32> %reload, <vscale x 4 x i32>* %out, align 16
+  ret void
+}
+
+define void @scalable4i32_to_fixed16i32(<16 x i32>* %out) {
+; CHECK-LABEL: @scalable4i32_to_fixed16i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <vscale x 4 x i32>, align 64
+; CHECK-NEXT:    [[CAST:%.*]] = bitcast <vscale x 4 x i32>* [[TMP]] to <16 x i32>*
+; CHECK-NEXT:    store <16 x i32> zeroinitializer, <16 x i32>* [[CAST]], align 64
+; CHECK-NEXT:    [[RELOAD:%.*]] = load volatile <16 x i32>, <16 x i32>* [[CAST]], align 64
+; CHECK-NEXT:    store <16 x i32> [[RELOAD]], <16 x i32>* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = alloca <vscale x 4 x i32>, align 16
+  %cast = bitcast <vscale x 4 x i32>* %tmp to <16 x i32>*
+  store <16 x i32> zeroinitializer, <16 x i32>* %cast, align 16
+  %reload = load volatile <16 x i32>, <16 x i32>* %cast, align 16
+  store <16 x i32> %reload, <16 x i32>* %out, align 16
+  ret void
+}
+
+define void @fixed16i32_to_scalable4i32(<vscale x 4 x i32>* %out) {
+; CHECK-LABEL: @fixed16i32_to_scalable4i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <16 x i32>, align 16
+; CHECK-NEXT:    [[CAST:%.*]] = bitcast <16 x i32>* [[TMP]] to <vscale x 4 x i32>*
+; CHECK-NEXT:    store volatile <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* [[CAST]], align 16
+; CHECK-NEXT:    [[RELOAD:%.*]] = load volatile <vscale x 4 x i32>, <vscale x 4 x i32>* [[CAST]], align 16
+; CHECK-NEXT:    store <vscale x 4 x i32> [[RELOAD]], <vscale x 4 x i32>* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = alloca <16 x i32>, align 16
+  %cast = bitcast <16 x i32>* %tmp to <vscale x 4 x i32>*
+  store volatile <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32>* %cast, align 16
+  %reload = load volatile <vscale x 4 x i32>, <vscale x 4 x i32>* %cast, align 16
+  store <vscale x 4 x i32> %reload, <vscale x 4 x i32>* %out, align 16
+  ret void
+}
+
+define void @scalable16i32_to_fixed16i32(<16 x i32>* %out) {
+; CHECK-LABEL: @scalable16i32_to_fixed16i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <vscale x 16 x i32>, align 64
+; CHECK-NEXT:    [[CAST:%.*]] = bitcast <vscale x 16 x i32>* [[TMP]] to <16 x i32>*
+; CHECK-NEXT:    store volatile <16 x i32> zeroinitializer, <16 x i32>* [[CAST]], align 64
+; CHECK-NEXT:    [[RELOAD:%.*]] = load volatile <16 x i32>, <16 x i32>* [[CAST]], align 64
+; CHECK-NEXT:    store <16 x i32> [[RELOAD]], <16 x i32>* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = alloca <vscale x 16 x i32>, align 16
+  %cast = bitcast <vscale x 16 x i32>* %tmp to <16 x i32>*
+  store volatile <16 x i32> zeroinitializer, <16 x i32>* %cast, align 16
+  %reload = load volatile <16 x i32>, <16 x i32>* %cast, align 16
+  store <16 x i32> %reload, <16 x i32>* %out, align 16
+  ret void
+}
+
+define void @scalable32i32_to_scalable16i32(<vscale x 16 x i32>* %out) {
+; CHECK-LABEL: @scalable32i32_to_scalable16i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <vscale x 32 x i32>, align 64
+; CHECK-NEXT:    [[CAST:%.*]] = bitcast <vscale x 32 x i32>* [[TMP]] to <vscale x 16 x i32>*
+; CHECK-NEXT:    store volatile <vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32>* [[CAST]], align 64
+; CHECK-NEXT:    [[RELOAD:%.*]] = load volatile <vscale x 16 x i32>, <vscale x 16 x i32>* [[CAST]], align 64
+; CHECK-NEXT:    store <vscale x 16 x i32> [[RELOAD]], <vscale x 16 x i32>* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = alloca <vscale x 32 x i32>, align 16
+  %cast = bitcast <vscale x 32 x i32>* %tmp to <vscale x 16 x i32>*
+  store volatile <vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32>* %cast, align 16
+  %reload = load volatile <vscale x 16 x i32>, <vscale x 16 x i32>* %cast, align 16
+  store <vscale x 16 x i32> %reload, <vscale x 16 x i32>* %out, align 16
+  ret void
+}
+
+define void @scalable32i16_to_scalable16i32(<vscale x 16 x i32>* %out) {
+; CHECK-LABEL: @scalable32i16_to_scalable16i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <vscale x 16 x i32>, align 64
+; CHECK-NEXT:    store volatile <vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32>* [[TMP]], align 64
+; CHECK-NEXT:    [[RELOAD:%.*]] = load volatile <vscale x 16 x i32>, <vscale x 16 x i32>* [[TMP]], align 64
+; CHECK-NEXT:    store <vscale x 16 x i32> [[RELOAD]], <vscale x 16 x i32>* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = alloca <vscale x 32 x i16>, align 16
+  %cast = bitcast <vscale x 32 x i16>* %tmp to <vscale x 16 x i32>*
+  store volatile <vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32>* %cast, align 16
+  %reload = load volatile <vscale x 16 x i32>, <vscale x 16 x i32>* %cast, align 16
+  store <vscale x 16 x i32> %reload, <vscale x 16 x i32>* %out, align 16
+  ret void
+}
+
+define void @scalable32i16_to_scalable16i32_multiuse(<vscale x 16 x i32>* %out, <vscale x 32 x i16>* %out2) {
+; CHECK-LABEL: @scalable32i16_to_scalable16i32_multiuse(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca <vscale x 32 x i16>, align 64
+; CHECK-NEXT:    [[CAST:%.*]] = bitcast <vscale x 32 x i16>* [[TMP]] to <vscale x 16 x i32>*
+; CHECK-NEXT:    store volatile <vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32>* [[CAST]], align 64
+; CHECK-NEXT:    [[RELOAD:%.*]] = load volatile <vscale x 16 x i32>, <vscale x 16 x i32>* [[CAST]], align 64
+; CHECK-NEXT:    store <vscale x 16 x i32> [[RELOAD]], <vscale x 16 x i32>* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RELOAD2:%.*]] = load volatile <vscale x 32 x i16>, <vscale x 32 x i16>* [[TMP]], align 64
+; CHECK-NEXT:    store <vscale x 32 x i16> [[RELOAD2]], <vscale x 32 x i16>* [[OUT2:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = alloca <vscale x 32 x i16>, align 16
+  %cast = bitcast <vscale x 32 x i16>* %tmp to <vscale x 16 x i32>*
+  store volatile <vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32>* %cast, align 16
+  %reload = load volatile <vscale x 16 x i32>, <vscale x 16 x i32>* %cast, align 16
+  store <vscale x 16 x i32> %reload, <vscale x 16 x i32>* %out, align 16
+  %reload2 = load volatile <vscale x 32 x i16>, <vscale x 32 x i16>* %tmp, align 16
+  store <vscale x 32 x i16> %reload2, <vscale x 32 x i16>* %out2, align 16
+  ret void
+}