Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -1951,15 +1951,15 @@ Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8); Use *U = S.getUse(); + IntrinsicInst *II = dyn_cast(U->getUser()); if (MemIntrinsic *MI = dyn_cast(U->getUser())) { if (MI->isVolatile()) return false; if (!S.isSplittable()) return false; // Skip any unsplittable intrinsics. - } else if (IntrinsicInst *II = dyn_cast(U->getUser())) { - if (II->getIntrinsicID() != Intrinsic::lifetime_start && - II->getIntrinsicID() != Intrinsic::lifetime_end) + } else if (II && II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) { return false; } else if (U->get()->getType()->getPointerElementType()->isStructTy()) { // Disable vector promotion when there are loads or stores of an FCA. Index: test/Transforms/ScalarRepl/sroa-lifetime-instrinsics.ll =================================================================== --- /dev/null +++ test/Transforms/ScalarRepl/sroa-lifetime-instrinsics.ll @@ -0,0 +1,53 @@ +; RUN: opt -S -sroa < %s | FileCheck %s +; For intrinsics which do generate lifetime start and end evaluate all +; conditions before deciding whether to slice vector loads or not + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) + +; Function Attrs: nounwind readonly +define i32 @foo(i8* %n) { +entry: +; CHECK-LABEL: @foo +; CHECK-NOT: %a.sroa.0.0.vec.insert = insertelement <8 x i16> undef, i16 %conv, i32 0 +; CHECK: store i16 %conv, i16* %a.sroa.0.0.arrayidx1.sroa_idx3, align 16 +; CHECK-NEXT: %a.sroa.0.0.a.sroa.0.0. = load <8 x i16>* %a.sroa.0 + + %n.addr = alloca i8*, align 8 + %a = alloca [32 x i16], align 2 + %c = alloca <4 x i32>, align 16 + %__ret = alloca <8 x i16>, align 16 + %tmp = alloca <8 x i16>, align 16 + %cleanup.dest.slot = alloca i32 + store i8* %n, i8** %n.addr, align 8 + %0 = bitcast [32 x i16]* %a to i8* + call void @llvm.lifetime.start(i64 64, i8* %0) #1 + %1 = load i8** %n.addr, align 8 + %arrayidx = getelementptr inbounds i8* %1, i64 0 + %2 = load i8* %arrayidx, align 1 + %conv = sext i8 %2 to i16 + %arrayidx1 = getelementptr inbounds [32 x i16]* %a, i32 0, i64 0 + store i16 %conv, i16* %arrayidx1, align 2 + %arraydecay = getelementptr inbounds [32 x i16]* %a, i32 0, i32 0 + %3 = bitcast i16* %arraydecay to i8* + %4 = bitcast i8* %3 to <8 x i16>* + %5 = load <8 x i16>* %4 + store <8 x i16> %5, <8 x i16>* %__ret, align 16 + %6 = load <8 x i16>* %__ret, align 16 + store <8 x i16> %6, <8 x i16>* %tmp + %7 = load <8 x i16>* %tmp + %8 = bitcast <8 x i16> %7 to <4 x i32> + store <4 x i32> %8, <4 x i32>* %c, align 16 + %9 = load <4 x i32>* %c, align 16 + %vecext = extractelement <4 x i32> %9, i32 0 + store i32 1, i32* %cleanup.dest.slot + %10 = bitcast [32 x i16]* %a to i8* + call void @llvm.lifetime.end(i64 64, i8* %10) #1 + ret i32 %vecext +}