diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -52,6 +52,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" @@ -97,6 +98,10 @@ #define DEBUG_TYPE "selectiondag" +static cl::opt EnableMemCpyScopedNoAlias( + "enable-memcpy-scoped-noalias", cl::Hidden, cl::init(true), + cl::desc("Enable scoped no-alias support during memcpy lowering")); + static cl::opt EnableMemCpyDAGOpt("enable-memcpy-dag-opt", cl::Hidden, cl::init(true), cl::desc("Gang up loads and stores generated by inlining of memcpy")); @@ -6378,6 +6383,29 @@ } } + AAMDNodes DstAAInfo, SrcAAInfo; + DstAAInfo = SrcAAInfo = AAInfo; + if (EnableMemCpyScopedNoAlias) { + MDBuilder MDB(*DAG.getContext()); + MDNode *Domain = + MDB.createAnonymousAliasScopeDomain("MemcpyLoweringDomain"); + MDNode *DstScope = MDB.createAnonymousAliasScope(Domain, "Dst"); + MDNode *SrcScope = MDB.createAnonymousAliasScope(Domain, "Src"); + MDNode *DstAliasScope = MDNode::concatenate( + AAInfo.Scope, MDNode::get(*DAG.getContext(), {DstScope})); + MDNode *DstNoAliase = MDNode::concatenate( + AAInfo.NoAlias, MDNode::get(*DAG.getContext(), {SrcScope})); + MDNode *SrcAliasScope = MDNode::concatenate( + AAInfo.Scope, MDNode::get(*DAG.getContext(), {SrcScope})); + MDNode *SrcNoAliase = MDNode::concatenate( + AAInfo.NoAlias, MDNode::get(*DAG.getContext(), {DstScope})); + + DstAAInfo.Scope = DstAliasScope; + DstAAInfo.NoAlias = DstNoAliase; + SrcAAInfo.Scope = SrcAliasScope; + SrcAAInfo.NoAlias = SrcNoAliase; + } + MachineMemOperand::Flags MMOFlags = isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; SmallVector OutLoadChains; @@ -6420,7 +6448,7 @@ Store = DAG.getStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), - DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, AAInfo); + DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, DstAAInfo); OutChains.push_back(Store); } } @@ -6444,13 +6472,13 @@ ISD::EXTLOAD, dl, NVT, Chain, DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), SrcPtrInfo.getWithOffset(SrcOff), VT, - commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, AAInfo); + commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, SrcAAInfo); OutLoadChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), - DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, AAInfo); + DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, DstAAInfo); OutStoreChains.push_back(Store); } SrcOff += VTSize; diff --git a/llvm/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll b/llvm/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll --- a/llvm/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll +++ b/llvm/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll @@ -9,9 +9,9 @@ ; CHECK: adrp x[[PAGE:[0-9]+]], {{l_b@PAGE|.Lb}} ; CHECK: add x[[ADDR:[0-9]+]], x[[PAGE]], {{l_b@PAGEOFF|:lo12:.Lb}} ; CHECK-NEXT: ldr [[VAL2:x[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: str [[VAL2]], [x0] ; CHECK-NEXT: ldr [[VAL:w[0-9]+]], [x[[ADDR]], #8] ; CHECK-NEXT: str [[VAL]], [x0, #8] -; CHECK-NEXT: str [[VAL2]], [x0] define void @foo(i8* %a) { call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 bitcast ([3 x i32]* @b to i8*), i64 12, i1 false) diff --git a/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll --- a/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll @@ -27,10 +27,10 @@ define void @t1(i8* nocapture %C) nounwind { entry: ; CHECK-LABEL: t1: -; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]] -; CHECK: ldur [[DEST:q[0-9]+]], [x[[BASEREG:[0-9]+]], #15] -; CHECK: stur [[DEST:q[0-9]+]], [x0, #15] -; CHECK: str [[DEST:q[0-9]+]], [x0] +; CHECK: ldr [[REG0:q[0-9]+]], [x[[BASEREG:[0-9]+]]] +; CHECK: str [[REG0]], [x0] +; CHECK: ldur [[REG1:q[0-9]+]], [x[[BASEREG]], #15] +; CHECK: stur [[REG1]], [x0, #15] tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i1 false) ret void } @@ -50,10 +50,10 @@ define void @t3(i8* nocapture %C) nounwind { entry: ; CHECK-LABEL: t3: -; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]] -; CHECK: ldr [[REG4:x[0-9]+]], [x[[BASEREG:[0-9]+]], #16] -; CHECK: str [[REG4]], [x0, #16] +; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG:[0-9]+]]] ; CHECK: str [[DEST]], [x0] +; CHECK: ldr [[REG4:x[0-9]+]], [x[[BASEREG]], #16] +; CHECK: str [[REG4]], [x0, #16] tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i1 false) ret void } diff --git a/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll b/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll --- a/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll @@ -26,14 +26,14 @@ ; loads and stores if strict-alignment is turned on. define void @t2(i8* %out, i8* %in) { ; CHECK-LABEL: t2: -; CHECK: ldrb w{{[0-9]+}}, [x1, #3] -; CHECK-NEXT: ldrb w{{[0-9]+}}, [x1, #2] -; CHECK-NEXT: ldrb w{{[0-9]+}}, [x1, #1] -; CHECK-NEXT: ldrb w{{[0-9]+}}, [x1] -; CHECK-NEXT: strb w{{[0-9]+}}, [x0, #3] -; CHECK-NEXT: strb w{{[0-9]+}}, [x0, #2] -; CHECK-NEXT: strb w{{[0-9]+}}, [x0, #1] -; CHECK-NEXT: strb w{{[0-9]+}}, [x0] +; CHECK: ldrb w[[V0:[0-9]+]], [x1] +; CHECK-NEXT: ldrb w[[V1:[0-9]+]], [x1, #1] +; CHECK-NEXT: ldrb w[[V2:[0-9]+]], [x1, #2] +; CHECK-NEXT: ldrb w[[V3:[0-9]+]], [x1, #3] +; CHECK-NEXT: strb w[[V0]], [x0] +; CHECK-NEXT: strb w[[V1]], [x0, #1] +; CHECK-NEXT: strb w[[V2]], [x0, #2] +; CHECK-NEXT: strb w[[V3]], [x0, #3] entry: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 4, i1 false) ret void diff --git a/llvm/test/CodeGen/PowerPC/pr45301.ll b/llvm/test/CodeGen/PowerPC/pr45301.ll --- a/llvm/test/CodeGen/PowerPC/pr45301.ll +++ b/llvm/test/CodeGen/PowerPC/pr45301.ll @@ -14,19 +14,19 @@ ; CHECK-NEXT: addis r4, r2, g@toc@ha ; CHECK-NEXT: addi r4, r4, g@toc@l ; CHECK-NEXT: ld r5, 0(r4) -; CHECK-NEXT: std r5, 0(r3) -; CHECK-NEXT: ld r5, 16(r4) -; CHECK-NEXT: std r5, 16(r3) -; CHECK-NEXT: ld r6, 8(r4) -; CHECK-NEXT: std r6, 8(r3) -; CHECK-NEXT: ld r6, 24(r4) -; CHECK-NEXT: std r6, 24(r3) -; CHECK-NEXT: lwz r6, 0(r3) +; CHECK-NEXT: ld r6, 16(r4) +; CHECK-NEXT: ld r7, 8(r4) +; CHECK-NEXT: ld r8, 24(r4) ; CHECK-NEXT: ld r4, 32(r4) +; CHECK-NEXT: std r5, 0(r3) ; CHECK-NEXT: std r4, 32(r3) ; CHECK-NEXT: li r4, 20 -; CHECK-NEXT: stwbrx r6, 0, r3 -; CHECK-NEXT: stwbrx r5, r3, r4 +; CHECK-NEXT: lwz r5, 0(r3) +; CHECK-NEXT: std r7, 8(r3) +; CHECK-NEXT: std r8, 24(r3) +; CHECK-NEXT: std r6, 16(r3) +; CHECK-NEXT: stwbrx r5, 0, r3 +; CHECK-NEXT: stwbrx r6, r3, r4 ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0