Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -184,14 +184,18 @@ } Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { - unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &AC, &DT); - unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT); - unsigned MinAlign = std::min(DstAlign, SrcAlign); - unsigned CopyAlign = MI->getAlignment(); - - // FIXME: Check & simplify source & dest alignments separately - if (CopyAlign < MinAlign) { - MI->setAlignment(MinAlign); + unsigned DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT); + unsigned CopyDstAlign = MI->getDestAlignment(); + if (CopyDstAlign < DstAlign){ + MI->setDestAlignment(DstAlign); + return MI; + } + + auto* MTI = cast(MI); + unsigned SrcAlign = getKnownAlignment(MTI->getRawSource(), DL, MI, &AC, &DT); + unsigned CopySrcAlign = MTI->getSourceAlignment(); + if (CopySrcAlign < SrcAlign) { + MTI->setSourceAlignment(SrcAlign); return MI; } @@ -235,15 +239,11 @@ CopyMD = cast(M->getOperand(2)); } - // If the memcpy/memmove provides better alignment info than we can - // infer, use it. - SrcAlign = std::max(SrcAlign, CopyAlign); - DstAlign = std::max(DstAlign, CopyAlign); - Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); LoadInst *L = Builder.CreateLoad(Src, MI->isVolatile()); - L->setAlignment(SrcAlign); + // Alignment from the mem intrinsic will be better, so use it. + L->setAlignment(CopySrcAlign); if (CopyMD) L->setMetadata(LLVMContext::MD_tbaa, CopyMD); MDNode *LoopMemParallelMD = @@ -252,7 +252,8 @@ L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD); StoreInst *S = Builder.CreateStore(L, Dest, MI->isVolatile()); - S->setAlignment(DstAlign); + // Alignment from the mem intrinsic will be better, so use it. + S->setAlignment(CopyDstAlign); if (CopyMD) S->setMetadata(LLVMContext::MD_tbaa, CopyMD); if (LoopMemParallelMD) Index: test/Transforms/InstCombine/memcpy-from-global.ll =================================================================== --- test/Transforms/InstCombine/memcpy-from-global.ll +++ test/Transforms/InstCombine/memcpy-from-global.ll @@ -60,7 +60,7 @@ ; CHECK-NEXT: getelementptr inbounds [124 x i8], [124 x i8]* ; use @G instead of %A -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %{{.*}}, i8* align 8 getelementptr inbounds (%T, %T* @G, i64 0, i32 0) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %{{.*}}, i8* align 16 getelementptr inbounds (%T, %T* @G, i64 0, i32 0) call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 bitcast (%T* @G to i8*), i64 124, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %b, i8* align 4 %a, i64 124, i1 false) call void @bar(i8* %b) Index: test/Transforms/InstCombine/memmove.ll =================================================================== --- test/Transforms/InstCombine/memmove.ll +++ test/Transforms/InstCombine/memmove.ll @@ -17,7 +17,7 @@ define void @test2(i8* %A, i32 %N) { ;; dest can't alias source since we can't write to source! ;; CHECK-LABEL: test2 - ;; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %A, i8* align 1 getelementptr inbounds ([33 x i8], [33 x i8]* @S, i{{32|64}} 0, i{{32|64}} 0), i32 %N, i1 false) + ;; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %A, i8* align 16 getelementptr inbounds ([33 x i8], [33 x i8]* @S, i{{32|64}} 0, i{{32|64}} 0), i32 %N, i1 false) ;; CHECK-NEXT: ret void call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* getelementptr inbounds ([33 x i8], [33 x i8]* @S, i32 0, i32 0), i32 %N, i1 false) ret void