diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1983,6 +1983,17 @@ uint64_t RelBegin = S.beginOffset() - AllocBeginOffset; uint64_t RelEnd = S.endOffset() - AllocBeginOffset; + // Lifetime intrinsics operate over the whole alloca whose sizes are usually + // larger than other load/store slices (RelEnd > Size). But lifetime are + // always promotable and should not impact other slices' promotability of the + // partition. + if (IntrinsicInst *II = dyn_cast(S.getUse()->getUser())) { + Intrinsic::ID IntriID = II->getIntrinsicID(); + if (IntriID == Intrinsic::lifetime_start || + IntriID == Intrinsic::lifetime_end) + return true; + } + // We can't reasonably handle cases where the load or store extends past // the end of the alloca's type and into its padding. if (RelEnd > Size) diff --git a/llvm/test/Transforms/SROA/lifetime-intrinsic.ll b/llvm/test/Transforms/SROA/lifetime-intrinsic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SROA/lifetime-intrinsic.ll @@ -0,0 +1,65 @@ +; RUN: opt < %s -passes=sroa -S | FileCheck %s + +%i32x2 = type { [2 x i32] } + +declare { i32, i32 } @bar(i32, i32) #2 + +define i16 @foo(i32* nocapture readonly %loop) #0 { +entry: + %arr = alloca %i32x2, align 4 + %p8 = bitcast %i32x2* %arr to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %p8) + %z0 = getelementptr inbounds %i32x2, %i32x2* %arr, i64 0, i32 0, i32 0 + store i32 0, i32* %z0, align 4 + %z1 = getelementptr inbounds %i32x2, %i32x2* %arr, i64 0, i32 0, i32 1 + store i32 0, i32* %z1, align 4 + br label %while_cond + +; CHECK-LABEL: while_cond: +; CHECK: [[ARR_SROA_6_0:%.*]] = phi i32 [ 0, %entry ], [ %res1, %while_body ] +; CHECK: [[ARR_SROA_0_0:%.*]] = phi i32 [ 0, %entry ], [ %res0, %while_body ] +while_cond: + %loopi = load i32, i32* %loop, align 4 + %loopb = icmp eq i32 %loopi, 0 + br i1 %loopb, label %while_end, label %while_body + +; CHECK-LABEL: while_body: +; CHECK-NEXT: %x = call { i32, i32 } @bar(i32 [[ARR_SROA_0_0]], i32 [[ARR_SROA_6_0]]) #0 +; CHECK-NEXT: %res0 = extractvalue { i32, i32 } %x, 0 +; CHECK-NEXT: %res1 = extractvalue { i32, i32 } %x, 1 +; CHECK-NEXT: br label %while_cond +while_body: + %p0 = getelementptr inbounds %i32x2, %i32x2* %arr, i64 0, i32 0, i32 0 + %x0 = load i32, i32* %p0, align 4 + %p1 = getelementptr inbounds %i32x2, %i32x2* %arr, i64 0, i32 0, i32 1 + %x1 = load i32, i32* %p1, align 4 + %x = call { i32, i32 } @bar(i32 %x0, i32 %x1) #2 + %res0 = extractvalue { i32, i32 } %x, 0 + %res1 = extractvalue { i32, i32 } %x, 1 + store i32 %res0, i32* %p0, align 4 + store i32 %res1, i32* %p1, align 4 + br label %while_cond + +; CHECK-LABEL: while_end: +; CHECK: [[ARR_SROA_0_0_EXT_TRUNC:%.*]] = trunc i32 [[ARR_SROA_0_0]] to i16 +; CHECK: [[ARR_SROA_6_4_EXT_TRUNC:%.*]] = trunc i32 [[ARR_SROA_6_0]] to i16 +; CHECK: %ret = add i16 [[ARR_SROA_0_0_EXT_TRUNC]], [[ARR_SROA_6_4_EXT_TRUNC]] +while_end: + %p0_i16 = bitcast %i32x2* %arr to i16* + %s0 = load i16, i16* %p0_i16, align 4 + %p1_ = getelementptr inbounds %i32x2, %i32x2* %arr, i64 0, i32 0, i32 1 + %p1_i16 = bitcast i32* %p1_ to i16* + %s1 = load i16, i16* %p1_i16, align 4 + %ret = add i16 %s0, %s1 + %p8_ = bitcast %i32x2* %arr to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %p8_) + ret i16 %ret +} + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 + +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 + +attributes #0 = { alwaysinline nounwind } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind }