Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -2698,7 +2698,7 @@ assert(!IsSplit); assert(NewBeginOffset == BeginOffset); II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType())); - II.setAlignment(getSliceAlign()); + II.setDestAlignment(getSliceAlign()); deleteIfTriviallyDead(OldPtr); return false; @@ -2819,13 +2819,13 @@ // update both source and dest of a single call. if (!IsSplittable) { Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); - if (IsDest) + if (IsDest) { II.setDest(AdjustedPtr); - else + II.setDestAlignment(SliceAlign); + } + else { II.setSource(AdjustedPtr); - - if (II.getAlignment() > SliceAlign) { - II.setAlignment(MinAlign(II.getAlignment(), SliceAlign)); + II.setSourceAlignment(SliceAlign); } DEBUG(dbgs() << " to: " << II << "\n"); @@ -2878,8 +2878,10 @@ // Compute the relative offset for the other pointer within the transfer. unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS); APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset); - unsigned OtherAlign = MinAlign(II.getAlignment() ? II.getAlignment() : 1, - OtherOffset.zextOrTrunc(64).getZExtValue()); + unsigned OtherAlign = + IsDest ? II.getSourceAlignment() : II.getDestAlignment(); + OtherAlign = MinAlign(OtherAlign ? OtherAlign : 1, + OtherOffset.zextOrTrunc(64).getZExtValue()); if (EmitMemCpy) { // Compute the other pointer, folding as much as possible to produce @@ -2891,9 +2893,22 @@ Type *SizeTy = II.getLength()->getType(); Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset); - CallInst *New = IRB.CreateMemCpy( - IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, Size, - MinAlign(SliceAlign, OtherAlign), II.isVolatile()); + Value *DestPtr, *SrcPtr; + unsigned DestAlign, SrcAlign; + // Note: IsDest is true iff we're copying into the new alloca slice + if (IsDest) { + DestPtr = OurPtr; + DestAlign = SliceAlign; + SrcPtr = OtherPtr; + SrcAlign = OtherAlign; + } else { + DestPtr = OtherPtr; + DestAlign = OtherAlign; + SrcPtr = OurPtr; + SrcAlign = SliceAlign; + } + CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign, + Size, II.isVolatile()); if (AATags) New->setAAMetadata(AATags); DEBUG(dbgs() << " to: " << *New << "\n"); Index: test/Transforms/SROA/basictest.ll =================================================================== --- test/Transforms/SROA/basictest.ll +++ test/Transforms/SROA/basictest.ll @@ -65,7 +65,7 @@ ret i64 %Z } -define void @test3(i8* %dst, i8* %src) { +define void @test3(i8* %dst, i8* align 8 %src) { ; CHECK-LABEL: @test3( entry: @@ -80,9 +80,9 @@ ; CHECK-NEXT: %[[test3_a7:.*]] = alloca [85 x i8] %b = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 300, i1 false), !tbaa !0 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* align 8 %src, i32 300, i1 false), !tbaa !0 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a1]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %src, i32 42, {{.*}}), !tbaa [[TAG_0:!.*]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %src, i32 42, {{.*}}), !tbaa [[TAG_0:!.*]] ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 42 ; CHECK-NEXT: %[[test3_r1:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]] ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 43 @@ -90,18 +90,18 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99, {{.*}}), !tbaa [[TAG_0:!.*]] ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 142 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 16, {{.*}}), !tbaa [[TAG_0:!.*]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 2 %[[gep_src]], i32 16, {{.*}}), !tbaa [[TAG_0:!.*]] ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 158 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 42, {{.*}}), !tbaa [[TAG_0:!.*]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 2 %[[gep_src]], i32 42, {{.*}}), !tbaa [[TAG_0:!.*]] ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 200 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0:!.*]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0:!.*]] ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 207 ; CHECK-NEXT: %[[test3_r2:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]] ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 208 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0:!.*]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0:!.*]] ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 215 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85, {{.*}}), !tbaa [[TAG_0:!.*]] @@ -778,7 +778,7 @@ ; CHECK-NEXT: %[[srccast1:.*]] = bitcast i8* %[[srcgep1]] to i32* ; CHECK-NEXT: %[[srcload:.*]] = load i32, i32* %[[srccast1]], {{.*}}, !tbaa [[TAG_0]] ; CHECK-NEXT: %[[agep1:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[agep1]], i8* %src, i32 %size, {{.*}}), !tbaa [[TAG_3]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[agep1]], i8* %src, i32 %size, {{.*}}), !tbaa [[TAG_3]] ; CHECK-NEXT: %[[agep2:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[agep2]], i8 42, i32 %size, {{.*}}), !tbaa [[TAG_5]] ; CHECK-NEXT: %[[dstcast1:.*]] = bitcast i8* %dst to i32* @@ -787,7 +787,7 @@ ; CHECK-NEXT: %[[dstcast2:.*]] = bitcast i8* %[[dstgep1]] to i32* ; CHECK-NEXT: store i32 %[[srcload]], i32* %[[dstcast2]], {{.*}}, !tbaa [[TAG_9]] ; CHECK-NEXT: %[[agep3:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[agep3]], i32 %size, {{.*}}), !tbaa [[TAG_11]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* align 1 %[[agep3]], i32 %size, {{.*}}), !tbaa [[TAG_11]] ; CHECK-NEXT: ret void entry: