diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1018,16 +1018,19 @@ SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo); + MachinePointerInfo SrcPtrInfo, + const AAMDNodes &AAInfo = AAMDNodes()); SDValue getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo); + MachinePointerInfo SrcPtrInfo, + const AAMDNodes &AAInfo = AAMDNodes()); SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, - MachinePointerInfo DstPtrInfo); + MachinePointerInfo DstPtrInfo, + const AAMDNodes &AAInfo = AAMDNodes()); SDValue getAtomicMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, unsigned DstAlign, SDValue Src, unsigned SrcAlign, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6327,7 +6327,8 @@ uint64_t Size, Align Alignment, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) { + MachinePointerInfo SrcPtrInfo, + const AAMDNodes &AAInfo) { // Turn a memcpy of undef to nop. // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) @@ -6386,6 +6387,10 @@ } } + // Prepare AAInfo for loads/stores after lowering this memcpy. + AAMDNodes NewAAInfo = AAInfo; + NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; + MachineMemOperand::Flags MMOFlags = isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; SmallVector OutLoadChains; @@ -6428,7 +6433,7 @@ Store = DAG.getStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), - DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); OutChains.push_back(Store); } } @@ -6452,13 +6457,13 @@ ISD::EXTLOAD, dl, NVT, Chain, DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), SrcPtrInfo.getWithOffset(SrcOff), VT, - commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags); + commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo); OutLoadChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), - DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo); OutStoreChains.push_back(Store); } SrcOff += VTSize; @@ -6517,7 +6522,8 @@ uint64_t Size, Align Alignment, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) { + MachinePointerInfo SrcPtrInfo, + const AAMDNodes &AAInfo) { // Turn a memmove of undef to nop. // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) @@ -6560,6 +6566,10 @@ } } + // Prepare AAInfo for loads/stores after lowering this memmove. + AAMDNodes NewAAInfo = AAInfo; + NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; + MachineMemOperand::Flags MMOFlags = isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; uint64_t SrcOff = 0, DstOff = 0; @@ -6578,10 +6588,10 @@ if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; - Value = - DAG.getLoad(VT, dl, Chain, - DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), - SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags); + Value = DAG.getLoad( + VT, dl, Chain, + DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), + SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags, NewAAInfo); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -6593,10 +6603,10 @@ unsigned VTSize = VT.getSizeInBits() / 8; SDValue Store; - Store = - DAG.getStore(Chain, dl, LoadValues[i], - DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), - DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); + Store = DAG.getStore( + Chain, dl, LoadValues[i], + DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), + DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); OutChains.push_back(Store); DstOff += VTSize; } @@ -6625,7 +6635,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, Align Alignment, bool isVol, - MachinePointerInfo DstPtrInfo) { + MachinePointerInfo DstPtrInfo, + const AAMDNodes &AAInfo) { // Turn a memset of undef to nop. // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) @@ -6672,6 +6683,10 @@ LargestVT = MemOps[i]; SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl); + // Prepare AAInfo for loads/stores after lowering this memset. + AAMDNodes NewAAInfo = AAInfo; + NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; + for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; @@ -6697,7 +6712,8 @@ Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), DstPtrInfo.getWithOffset(DstOff), Alignment, - isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone); + isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone, + NewAAInfo); OutChains.push_back(Store); DstOff += VT.getSizeInBits() / 8; Size -= VTSize; @@ -6720,7 +6736,8 @@ SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) { + MachinePointerInfo SrcPtrInfo, + const AAMDNodes &AAInfo) { // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast(Size); @@ -6731,7 +6748,7 @@ SDValue Result = getMemcpyLoadsAndStores( *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, false, DstPtrInfo, SrcPtrInfo); + isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo); if (Result.getNode()) return Result; } @@ -6752,7 +6769,7 @@ assert(ConstantSize && "AlwaysInline requires a constant size!"); return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, true, DstPtrInfo, SrcPtrInfo); + isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo); } checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); @@ -6834,7 +6851,8 @@ SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) { + MachinePointerInfo SrcPtrInfo, + const AAMDNodes &AAInfo) { // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast(Size); @@ -6845,7 +6863,7 @@ SDValue Result = getMemmoveLoadsAndStores( *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, false, DstPtrInfo, SrcPtrInfo); + isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo); if (Result.getNode()) return Result; } @@ -6935,7 +6953,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, - MachinePointerInfo DstPtrInfo) { + MachinePointerInfo DstPtrInfo, + const AAMDNodes &AAInfo) { // Check to see if we should lower the memset to stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast(Size); @@ -6946,7 +6965,7 @@ SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, DstPtrInfo); + isVol, DstPtrInfo, AAInfo); if (Result.getNode()) return Result; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5835,10 +5835,12 @@ // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1))); + MachinePointerInfo(I.getArgOperand(1)), AAInfo); updateDAGForMaybeTailCall(MC); return; } @@ -5856,10 +5858,12 @@ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol, /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1))); + MachinePointerInfo(I.getArgOperand(1)), AAInfo); updateDAGForMaybeTailCall(MC); return; } @@ -5873,8 +5877,10 @@ bool isVol = MSI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue Root = isVol ? getRoot() : getMemoryRoot(); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, - MachinePointerInfo(I.getArgOperand(0))); + MachinePointerInfo(I.getArgOperand(0)), AAInfo); updateDAGForMaybeTailCall(MS); return; } @@ -5892,9 +5898,11 @@ // FIXME: Support passing different dest/src alignments to the memmove DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1))); + MachinePointerInfo(I.getArgOperand(1)), AAInfo); updateDAGForMaybeTailCall(MM); return; } @@ -7703,10 +7711,12 @@ // because the return pointer needs to be adjusted by the size of // the copied memory. SDValue Root = isVol ? getRoot() : getMemoryRoot(); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false, /*isTailCall=*/false, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1))); + MachinePointerInfo(I.getArgOperand(1)), AAInfo); assert(MC.getNode() != nullptr && "** memcpy should not be lowered as TailCall in mempcpy context **"); DAG.setRoot(MC); diff --git a/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll b/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll --- a/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll +++ b/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll @@ -8,15 +8,16 @@ ; MIR-DAG: ![[SET1:[0-9]+]] = !{![[SCOPE1]]} ; MIR-LABEL: name: test_memcpy -; MIR: %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 4) -; MIR-NEXT: STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 4) +; MIR: %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memcpy(i32* nocapture %p, i32* nocapture readonly %q) { ; CHECK-LABEL: test_memcpy: -; CHECK: ldr [[PVAL:q[0-9]+]], [x0, #16] -; CHECK: str [[PVAL]], [x0] -; CHECK: ldp [[Q0:w[0-9]+]], [[Q1:w[0-9]+]], [x1] -; CHECK: add w0, [[Q0]], [[Q1]] -; CHECK: ret +; CHECK-DAG: ldp [[Q0:w[0-9]+]], [[Q1:w[0-9]+]], [x1] +; CHECK-DAG: ldr [[PVAL:q[0-9]+]], [x0, #16] +; CHECK-DAG: add w8, [[Q0]], [[Q1]] +; CHECK: str [[PVAL]], [x0] +; CHECK: mov w0, w8 +; CHECK: ret %p0 = bitcast i32* %p to i8* %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 %p1 = bitcast i32* %add.ptr to i8* @@ -29,15 +30,16 @@ } ; MIR-LABEL: name: test_memcpy_inline -; MIR: %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 4) -; MIR-NEXT: STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 4) +; MIR: %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memcpy_inline(i32* nocapture %p, i32* nocapture readonly %q) { ; CHECK-LABEL: test_memcpy_inline: -; CHECK: ldr [[PVAL:q[0-9]+]], [x0, #16] -; CHECK: str [[PVAL]], [x0] -; CHECK: ldp [[Q0:w[0-9]+]], [[Q1:w[0-9]+]], [x1] -; CHECK: add w0, [[Q0]], [[Q1]] -; CHECK: ret +; CHECK-DAG: ldp [[Q0:w[0-9]+]], [[Q1:w[0-9]+]], [x1] +; CHECK-DAG: ldr [[PVAL:q[0-9]+]], [x0, #16] +; CHECK-DAG: add w8, [[Q0]], [[Q1]] +; CHECK: str [[PVAL]], [x0] +; CHECK: mov w0, w8 +; CHECK: ret %p0 = bitcast i32* %p to i8* %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 %p1 = bitcast i32* %add.ptr to i8* @@ -50,15 +52,16 @@ } ; MIR-LABEL: name: test_memmove -; MIR: %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 4) -; MIR-NEXT: STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 4) +; MIR: %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memmove(i32* nocapture %p, i32* nocapture readonly %q) { ; CHECK-LABEL: test_memmove: -; CHECK: ldr [[PVAL:q[0-9]+]], [x0, #16] -; CHECK: str [[PVAL]], [x0] -; CHECK: ldp [[Q0:w[0-9]+]], [[Q1:w[0-9]+]], [x1] -; CHECK: add w0, [[Q0]], [[Q1]] -; CHECK: ret +; CHECK-DAG: ldp [[Q0:w[0-9]+]], [[Q1:w[0-9]+]], [x1] +; CHECK-DAG: ldr [[PVAL:q[0-9]+]], [x0, #16] +; CHECK-DAG: add w8, [[Q0]], [[Q1]] +; CHECK: str [[PVAL]], [x0] +; CHECK: mov w0, w8 +; CHECK: ret %p0 = bitcast i32* %p to i8* %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 %p1 = bitcast i32* %add.ptr to i8* @@ -72,15 +75,16 @@ ; MIR-LABEL: name: test_memset ; MIR: %2:gpr64 = MOVi64imm -6148914691236517206 -; MIR-NEXT: STRXui %2, %0, 1 :: (store 8 into %ir.p0 + 8, align 4) -; MIR-NEXT: STRXui %2, %0, 0 :: (store 8 into %ir.p0, align 4) +; MIR-NEXT: STRXui %2, %0, 1 :: (store 8 into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRXui %2, %0, 0 :: (store 8 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memset(i32* nocapture %p, i32* nocapture readonly %q) { ; CHECK-LABEL: test_memset: -; CHECK: mov [[PVAL:x[0-9]+]], #-6148914691236517206 -; CHECK: stp [[PVAL]], [[PVAL]], [x0] -; CHECK: ldp [[Q0:w[0-9]+]], [[Q1:w[0-9]+]], [x1] -; CHECK: add w0, [[Q0]], [[Q1]] -; CHECK: ret +; CHECK-DAG: ldp [[Q0:w[0-9]+]], [[Q1:w[0-9]+]], [x1] +; CHECK-DAG: mov [[PVAL:x[0-9]+]], #-6148914691236517206 +; CHECK: stp [[PVAL]], [[PVAL]], [x0] +; CHECK: add w8, [[Q0]], [[Q1]] +; CHECK: mov w0, w8 +; CHECK: ret %p0 = bitcast i32* %p to i8* tail call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8 170, i64 16, i1 false), !alias.scope !2, !noalias !4 %v0 = load i32, i32* %q, align 4, !alias.scope !4, !noalias !2 @@ -91,15 +95,16 @@ } ; MIR-LABEL: name: test_mempcpy -; MIR: %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 1) -; MIR-NEXT: STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 1) +; MIR: %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_mempcpy(i32* nocapture %p, i32* nocapture readonly %q) { ; CHECK-LABEL: test_mempcpy: -; CHECK: ldr [[PVAL:q[0-9]+]], [x0, #16] -; CHECK: str [[PVAL]], [x0] -; CHECK: ldp [[Q0:w[0-9]+]], [[Q1:w[0-9]+]], [x1] -; CHECK: add w0, [[Q0]], [[Q1]] -; CHECK: ret +; CHECK-DAG: ldp [[Q0:w[0-9]+]], [[Q1:w[0-9]+]], [x1] +; CHECK-DAG: ldr [[PVAL:q[0-9]+]], [x0, #16] +; CHECK-DAG: add w8, [[Q0]], [[Q1]] +; CHECK: str [[PVAL]], [x0] +; CHECK: mov w0, w8 +; CHECK: ret %p0 = bitcast i32* %p to i8* %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 %p1 = bitcast i32* %add.ptr to i8* diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll b/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll --- a/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll +++ b/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll @@ -12,16 +12,16 @@ ; MIR-DAG: ![[SET1:[0-9]+]] = !{![[SCOPE1]]} ; MIR-LABEL: name: test_memcpy -; MIR: %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, addrspace 1) -; MIR: GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, addrspace 1) +; MIR: %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) +; MIR: GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) define i32 @test_memcpy(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) { ; Check loads of %q are scheduled ahead of that store of the memcpy on %p. ; CHECK-LABEL: test_memcpy: -; CHECK: global_load_dwordx4 [[PVAL:v\[[0-9]+:[0-9]+\]]], v[0:1], off offset:16 -; CHECK: global_store_dwordx4 v[0:1], [[PVAL]], off -; CHECK: global_load_dwordx2 v{{\[}}[[Q0:[0-9]+]]:[[Q1:[0-9]+]]{{\]}}, v[2:3], off -; CHECK: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]] -; CHECK: s_setpc_b64 s[30:31] +; CHECK-DAG: global_load_dwordx2 v{{\[}}[[Q0:[0-9]+]]:[[Q1:[0-9]+]]{{\]}}, v[2:3], off +; CHECK-DAG: global_load_dwordx4 [[PVAL:v\[[0-9]+:[0-9]+\]]], v[0:1], off offset:16 +; CHECK-DAG: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]] +; CHECK: global_store_dwordx4 v[0:1], [[PVAL]], off +; CHECK: s_setpc_b64 s[30:31] %p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)* %add.ptr = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 4 %p1 = bitcast i32 addrspace(1)* %add.ptr to i8 addrspace(1)* @@ -34,16 +34,16 @@ } ; MIR-LABEL: name: test_memcpy_inline -; MIR: %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, addrspace 1) -; MIR: GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, addrspace 1) +; MIR: %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) +; MIR: GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) define i32 @test_memcpy_inline(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) { ; Check loads of %q are scheduled ahead of that store of the memcpy on %p. ; CHECK-LABEL: test_memcpy_inline: -; CHECK: global_load_dwordx4 [[PVAL:v\[[0-9]+:[0-9]+\]]], v[0:1], off offset:16 -; CHECK: global_store_dwordx4 v[0:1], [[PVAL]], off -; CHECK: global_load_dwordx2 v{{\[}}[[Q0:[0-9]+]]:[[Q1:[0-9]+]]{{\]}}, v[2:3], off -; CHECK: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]] -; CHECK: s_setpc_b64 s[30:31] +; CHECK-DAG: global_load_dwordx2 v{{\[}}[[Q0:[0-9]+]]:[[Q1:[0-9]+]]{{\]}}, v[2:3], off +; CHECK-DAG: global_load_dwordx4 [[PVAL:v\[[0-9]+:[0-9]+\]]], v[0:1], off offset:16 +; CHECK-DAG: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]] +; CHECK: global_store_dwordx4 v[0:1], [[PVAL]], off +; CHECK: s_setpc_b64 s[30:31] %p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)* %add.ptr = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 4 %p1 = bitcast i32 addrspace(1)* %add.ptr to i8 addrspace(1)* @@ -56,16 +56,16 @@ } ; MIR-LABEL: name: test_memmove -; MIR: %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, addrspace 1) -; MIR: GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, addrspace 1) +; MIR: %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) +; MIR: GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) define i32 @test_memmove(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) { ; Check loads of %q are scheduled ahead of that store of the memmove on %p. ; CHECK-LABEL: test_memmove: -; CHECK: global_load_dwordx4 [[PVAL:v\[[0-9]+:[0-9]+\]]], v[0:1], off offset:16 -; CHECK: global_store_dwordx4 v[0:1], [[PVAL]] -; CHECK: global_load_dwordx2 v{{\[}}[[Q0:[0-9]+]]:[[Q1:[0-9]+]]{{\]}}, v[2:3], off -; CHECK: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]] -; CHECK: s_setpc_b64 s[30:31] +; CHECK-DAG: global_load_dwordx2 v{{\[}}[[Q0:[0-9]+]]:[[Q1:[0-9]+]]{{\]}}, v[2:3], off +; CHECK-DAG: global_load_dwordx4 [[PVAL:v\[[0-9]+:[0-9]+\]]], v[0:1], off offset:16 +; CHECK-DAG: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]] +; CHECK: global_store_dwordx4 v[0:1], [[PVAL]] +; CHECK: s_setpc_b64 s[30:31] %p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)* %add.ptr = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 4 %p1 = bitcast i32 addrspace(1)* %add.ptr to i8 addrspace(1)* @@ -78,15 +78,15 @@ } ; MIR-LABEL: name: test_memset -; MIR: GLOBAL_STORE_DWORDX4 killed %10, killed %11, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, addrspace 1) +; MIR: GLOBAL_STORE_DWORDX4 killed %10, killed %11, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) define i32 @test_memset(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) { ; Check loads of %q are scheduled ahead of that store of the memset on %p. ; CHECK-LABEL: test_memset: -; CHECK: v_mov_b32_e32 v[[PVAL:[0-9]+]], 0xaaaaaaaa -; CHECK: global_store_dwordx4 v[0:1], v{{\[}}[[PVAL]]{{:[0-9]+\]}}, off -; CHECK: global_load_dwordx2 v{{\[}}[[Q0:[0-9]+]]:[[Q1:[0-9]+]]{{\]}}, v[2:3], off -; CHECK: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]] -; CHECK: s_setpc_b64 s[30:31] +; CHECK-DAG: global_load_dwordx2 v{{\[}}[[Q0:[0-9]+]]:[[Q1:[0-9]+]]{{\]}}, v[2:3], off +; CHECK-DAG: v_mov_b32_e32 v[[PVAL:[0-9]+]], 0xaaaaaaaa +; CHECK: global_store_dwordx4 v[0:1], v{{\[}}[[PVAL]]{{:[0-9]+\]}}, off +; CHECK: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]] +; CHECK: s_setpc_b64 s[30:31] %p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)* tail call void @llvm.memset.p1i8.i64(i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p0, i8 170, i64 16, i1 false), !alias.scope !2, !noalias !4 %v0 = load i32, i32 addrspace(1)* %q, align 4, !alias.scope !4, !noalias !2 diff --git a/llvm/test/CodeGen/X86/memcpy-scoped-aa.ll b/llvm/test/CodeGen/X86/memcpy-scoped-aa.ll --- a/llvm/test/CodeGen/X86/memcpy-scoped-aa.ll +++ b/llvm/test/CodeGen/X86/memcpy-scoped-aa.ll @@ -11,10 +11,10 @@ ; MIR-DAG: ![[SET1:[0-9]+]] = !{![[SCOPE1]]} ; MIR-LABEL: name: test_memcpy -; MIR: %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4) -; MIR-NEXT: %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 4) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 4) +; MIR: %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memcpy(i32* nocapture %p, i32* nocapture readonly %q) { %p0 = bitcast i32* %p to i8* %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 @@ -28,10 +28,10 @@ } ; MIR-LABEL: name: test_memcpy_inline -; MIR: %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4) -; MIR-NEXT: %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 4) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 4) +; MIR: %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memcpy_inline(i32* nocapture %p, i32* nocapture readonly %q) { %p0 = bitcast i32* %p to i8* %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 @@ -45,10 +45,10 @@ } ; MIR-LABEL: name: test_memmove -; MIR: %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4) -; MIR-NEXT: %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 4) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 4) +; MIR: %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memmove(i32* nocapture %p, i32* nocapture readonly %q) { %p0 = bitcast i32* %p to i8* %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 @@ -63,8 +63,8 @@ ; MIR-LABEL: name: test_memset ; MIR: %2:gr64 = MOV64ri -6148914691236517206 -; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, %2 :: (store 8 into %ir.p0 + 8, align 4) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, %2 :: (store 8 into %ir.p0, align 4) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, %2 :: (store 8 into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, %2 :: (store 8 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memset(i32* nocapture %p, i32* nocapture readonly %q) { %p0 = bitcast i32* %p to i8* tail call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8 170, i64 16, i1 false), !alias.scope !2, !noalias !4 @@ -76,10 +76,10 @@ } ; MIR-LABEL: name: test_mempcpy -; MIR: %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 1) -; MIR-NEXT: %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 1) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 1) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 1) +; MIR: %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_mempcpy(i32* nocapture %p, i32* nocapture readonly %q) { %p0 = bitcast i32* %p to i8* %add.ptr = getelementptr inbounds i32, i32* %p, i64 4