Index: lib/Transforms/Scalar/SROA.cpp
===================================================================
--- lib/Transforms/Scalar/SROA.cpp
+++ lib/Transforms/Scalar/SROA.cpp
@@ -1951,15 +1951,15 @@
       Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
 
   Use *U = S.getUse();
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser());
 
   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
     if (MI->isVolatile())
       return false;
     if (!S.isSplittable())
       return false; // Skip any unsplittable intrinsics.
-  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
-    if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
-        II->getIntrinsicID() != Intrinsic::lifetime_end)
+  } else if (II && II->getIntrinsicID() != Intrinsic::lifetime_start &&
+        II->getIntrinsicID() != Intrinsic::lifetime_end) {
       return false;
   } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
     // Disable vector promotion when there are loads or stores of an FCA.
Index: test/Transforms/ScalarRepl/sroa-lifetime-instrinsics.ll
===================================================================
--- /dev/null
+++ test/Transforms/ScalarRepl/sroa-lifetime-instrinsics.ll
@@ -0,0 +1,53 @@
+; RUN: opt -S -sroa < %s | FileCheck %s
+; For intrinsics which do generate lifetime start and end evaluate all
+; conditions before deciding whether to slice vector loads or not
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+; Function Attrs: nounwind readonly
+define i32 @foo(i8* %n) {
+entry:
+; CHECK-LABEL: @foo
+; CHECK-NOT: %a.sroa.0.0.vec.insert = insertelement <8 x i16> undef, i16 %conv, i32 0
+; CHECK: store i16 %conv, i16* %a.sroa.0.0.arrayidx1.sroa_idx3, align 16
+; CHECK-NEXT: %a.sroa.0.0.a.sroa.0.0. = load <8 x i16>* %a.sroa.0
+
+  %n.addr = alloca i8*, align 8
+  %a = alloca [32 x i16], align 2
+  %c = alloca <4 x i32>, align 16
+  %__ret = alloca <8 x i16>, align 16
+  %tmp = alloca <8 x i16>, align 16
+  %cleanup.dest.slot = alloca i32
+  store i8* %n, i8** %n.addr, align 8
+  %0 = bitcast [32 x i16]* %a to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %0) #1
+  %1 = load i8** %n.addr, align 8
+  %arrayidx = getelementptr inbounds i8* %1, i64 0
+  %2 = load i8* %arrayidx, align 1
+  %conv = sext i8 %2 to i16
+  %arrayidx1 = getelementptr inbounds [32 x i16]* %a, i32 0, i64 0
+  store i16 %conv, i16* %arrayidx1, align 2
+  %arraydecay = getelementptr inbounds [32 x i16]* %a, i32 0, i32 0
+  %3 = bitcast i16* %arraydecay to i8*
+  %4 = bitcast i8* %3 to <8 x i16>*
+  %5 = load <8 x i16>* %4
+  store <8 x i16> %5, <8 x i16>* %__ret, align 16
+  %6 = load <8 x i16>* %__ret, align 16
+  store <8 x i16> %6, <8 x i16>* %tmp
+  %7 = load <8 x i16>* %tmp
+  %8 = bitcast <8 x i16> %7 to <4 x i32>
+  store <4 x i32> %8, <4 x i32>* %c, align 16
+  %9 = load <4 x i32>* %c, align 16
+  %vecext = extractelement <4 x i32> %9, i32 0
+  store i32 1, i32* %cleanup.dest.slot
+  %10 = bitcast [32 x i16]* %a to i8*
+  call void @llvm.lifetime.end(i64 64, i8* %10) #1
+  ret i32 %vecext
+}