Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -1951,15 +1951,15 @@ Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8); Use *U = S.getUse(); + IntrinsicInst *II = dyn_cast(U->getUser()); if (MemIntrinsic *MI = dyn_cast(U->getUser())) { if (MI->isVolatile()) return false; if (!S.isSplittable()) return false; // Skip any unsplittable intrinsics. - } else if (IntrinsicInst *II = dyn_cast(U->getUser())) { - if (II->getIntrinsicID() != Intrinsic::lifetime_start && - II->getIntrinsicID() != Intrinsic::lifetime_end) + } else if (II && II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) { return false; } else if (U->get()->getType()->getPointerElementType()->isStructTy()) { // Disable vector promotion when there are loads or stores of an FCA. Index: test/Transforms/SROA/sroa-lifetime-instrinsics.ll =================================================================== --- /dev/null +++ test/Transforms/SROA/sroa-lifetime-instrinsics.ll @@ -0,0 +1,53 @@ +; RUN: opt -S -sroa < %s | FileCheck %s +; For intrinsics which do generate lifetime start and end evaluate all +; conditions before deciding whether to slice vector loads or not + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) + +; Function Attrs: nounwind readonly +define i32 @foo(i8* %n) { +entry: +; CHECK-LABEL: @foo +; CHECK-NOT: %a.sroa.0.0.vec.insert = insertelement <8 x i16> undef, i16 %conv, i32 0 +; CHECK: store i16 %conv, i16* %a.sroa.0.0.arrayidx1.sroa_idx3, align 16 +; CHECK-NEXT: %a.sroa.0.0.a.sroa.0.0. = load <8 x i16>* %a.sroa.0 + + %n.addr = alloca i8*, align 8 + %a = alloca [32 x i16], align 2 + %c = alloca <4 x i32>, align 16 + %__ret = alloca <8 x i16>, align 16 + %tmp = alloca <8 x i16>, align 16 + %cleanup.dest.slot = alloca i32 + store i8* %n, i8** %n.addr, align 8 + %0 = bitcast [32 x i16]* %a to i8* + call void @llvm.lifetime.start(i64 64, i8* %0) #1 + %1 = load i8** %n.addr, align 8 + %arrayidx = getelementptr inbounds i8* %1, i64 0 + %2 = load i8* %arrayidx, align 1 + %conv = sext i8 %2 to i16 + %arrayidx1 = getelementptr inbounds [32 x i16]* %a, i32 0, i64 0 + store i16 %conv, i16* %arrayidx1, align 2 + %arraydecay = getelementptr inbounds [32 x i16]* %a, i32 0, i32 0 + %3 = bitcast i16* %arraydecay to i8* + %4 = bitcast i8* %3 to <8 x i16>* + %5 = load <8 x i16>* %4 + store <8 x i16> %5, <8 x i16>* %__ret, align 16 + %6 = load <8 x i16>* %__ret, align 16 + store <8 x i16> %6, <8 x i16>* %tmp + %7 = load <8 x i16>* %tmp + %8 = bitcast <8 x i16> %7 to <4 x i32> + store <4 x i32> %8, <4 x i32>* %c, align 16 + %9 = load <4 x i32>* %c, align 16 + %vecext = extractelement <4 x i32> %9, i32 0 + store i32 1, i32* %cleanup.dest.slot + %10 = bitcast [32 x i16]* %a to i8* + call void @llvm.lifetime.end(i64 64, i8* %10) #1 + ret i32 %vecext +} Index: test/Transforms/SROA/vector-lifetime-intrinsic.ll =================================================================== --- test/Transforms/SROA/vector-lifetime-intrinsic.ll +++ /dev/null @@ -1,31 +0,0 @@ -; RUN: opt -sroa -S < %s | FileCheck %s - -target datalayout = "e-p:64:32-i64:32-v32:32-n32-S64" - -; Function Attrs: nounwind -declare void @llvm.lifetime.start(i64, i8* nocapture) #0 - -; Function Attrs: nounwind -declare void @llvm.lifetime.end(i64, i8* nocapture) #0 - -; CHECK: @wombat -; CHECK-NOT: alloca -; CHECK: ret void -define void @wombat(<4 x float> %arg1) { -bb: - %tmp = alloca <4 x float>, align 16 - %tmp8 = bitcast <4 x float>* %tmp to i8* - call void @llvm.lifetime.start(i64 16, i8* %tmp8) - store <4 x float> %arg1, <4 x float>* %tmp, align 16 - %tmp17 = bitcast <4 x float>* %tmp to <3 x float>* - %tmp18 = load <3 x float>* %tmp17 - %tmp20 = bitcast <4 x float>* %tmp to i8* - call void @llvm.lifetime.end(i64 16, i8* %tmp20) - call void @wombat3(<3 x float> %tmp18) - ret void -} - -; Function Attrs: nounwind -declare void @wombat3(<3 x float>) #0 - -attributes #0 = { nounwind }