Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -2547,7 +2547,9 @@ bool EmitMemCpy = !VecTy && !IntTy && (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset || - !NewAI.getAllocatedType()->isSingleValueType()); + NewAllocaEndOffset - NewAllocaBeginOffset < + DL.getTypeStoreSize(NewAllocaTy) || + !NewAllocaTy->isSingleValueType()); // If we're just going to emit a memcpy, the alloca hasn't changed, and the // size hasn't been shrunk based on analysis of the viable range, this is Index: test/Transforms/SROA/vector-promotion.ll =================================================================== --- test/Transforms/SROA/vector-promotion.ll +++ test/Transforms/SROA/vector-promotion.ll @@ -468,3 +468,38 @@ ; CHECK: %[[insert:.*]] = or i32 %{{.*}}, %[[trunc]] ; CHECK: ret i32 %[[insert]] } + +%S7 = type { float, float, float } +%U7 = type { <4 x float> } + +declare i32 @test7_helper(%S7*) + +define i32 @test7(%S7* %x) { +; CHECK-LABEL: @test7( +; Test case to verify that SROA does not rewrite a 12-byte memcpy into a 16-byte +; vector store, hence accidentally putting gibberish onto the stack. +entry: + ; Create a temporary variable %tmp1 and copy %x[0] into it + %tmp1 = alloca %S7, align 4 + %0 = bitcast %S7* %tmp1 to i8* + %1 = bitcast %S7* %x to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 12, i32 4, i1 false) + + ; The following block does nothing; but appears to confuse SROA + %unused1 = bitcast %S7* %tmp1 to %U7* + %unused2 = getelementptr inbounds %U7* %unused1, i32 0, i32 0 + %unused3 = load <4 x float>* %unused2, align 1 + + ; Create a second temporary and copy %tmp1 into it + %tmp2 = alloca %S7, align 4 + %2 = bitcast %S7* %tmp2 to i8* + %3 = bitcast %S7* %tmp1 to i8* +; CHECK: alloca +; CHECK-NOT: store <4 x float> + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %3, i32 12, i32 4, i1 false) + + %result = call i32 @test7_helper(%S7* %tmp2) + ret i32 %result +; CHECK: ret i32 %result +} +