Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -3033,7 +3033,22 @@ ConstantInt *Size = ConstantInt::get(cast(II.getArgOperand(0)->getType()), NewEndOffset - NewBeginOffset); - Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); + // For lifetime intrinsics it's ok to cast a pointer type of a new slice + // to a generic i8* pointer. There is a case when using the pointer type + // from whole alloca leads to an assertion in PromoteMemToReg. A code + // looks like this. The initial alloca is + // + // alloca [10 x float] + // + // it's split into several allocas and one is just a float + // + // alloca float + // + // which is bitcast'ed to [10 x float]* type. When PromoteMemToReg gets + // such bitcast it throws the assertion. + Type *PointerTy = IRB.getInt8PtrTy(OldPtr->getType()->getPointerAddressSpace()); + Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy); + Value *New; if (II.getIntrinsicID() == Intrinsic::lifetime_start) New = IRB.CreateLifetimeStart(Ptr, Size); Index: test/Transforms/SROA/extra-cast-in-lifetime.ll =================================================================== --- /dev/null +++ test/Transforms/SROA/extra-cast-in-lifetime.ll @@ -0,0 +1,52 @@ +; RUN: opt < %s -S -sroa | FileCheck %s + +@MainKernel.callA = dso_local addrspace(3) global [10 x float] undef, align 4 + +define void @MainKernel(i32 %iNumSteps, i32 %tid) { +; CHECK-LABEL: @MainKernel( +; CHECK-NOT: alloca [10 x float] +; CHECK: ret void + +entry: + %rA = alloca [10 x float], align 4 + call void @llvm.lifetime.start.isVoid.i64.p0i8(i64 40, [10 x float]* nonnull %rA) + + %cmp1 = icmp sgt i32 %iNumSteps, 0 + br i1 %cmp1, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %tobool = icmp eq i32 %tid, 0 + %conv.i = zext i32 %tid to i64 + %0 = bitcast [10 x float]* %rA to i32* + %1 = load i32, i32* %0, align 4 + %arrayidx5 = getelementptr inbounds [10 x float], [10 x float] addrspace(3)* @MainKernel.callA, i64 0, i64 %conv.i + %2 = bitcast float addrspace(3)* %arrayidx5 to i32 addrspace(3)* + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.inc + %i.02 = phi i32 [ %iNumSteps, %for.body.lr.ph ], [ %sub, %for.inc ] + br i1 %tobool, label %for.body.for.inc_crit_edge, label %if.then + +for.body.for.inc_crit_edge: ; preds = %for.body + br label %for.inc + +if.then: ; preds = %for.body + store i32 %1, i32 addrspace(3)* %2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.for.inc_crit_edge, %if.then + %sub = add i32 %i.02, -1 + %cmp = icmp sgt i32 %sub, 0 + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.inc + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + call void @llvm.lifetime.end.isVoid.i64.p0i8(i64 40, [10 x float]* nonnull %rA) + + ret void +} + +declare void @llvm.lifetime.start.isVoid.i64.p0i8(i64, [10 x float]* nocapture) +declare void @llvm.lifetime.end.isVoid.i64.p0i8(i64, [10 x float]* nocapture)