Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -34,6 +34,7 @@ #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" @@ -4893,6 +4894,20 @@ SmallVector OutChains; unsigned NumMemOps = MemOps.size(); uint64_t SrcOff = 0, DstOff = 0; + + MDBuilder MDB(*DAG.getContext()); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain(); + + MDNode *LoadScope = MDB.createAnonymousAliasScope(NewDomain); + MDNode *StoreScope = MDB.createAnonymousAliasScope(NewDomain); + + AAMDNodes AAInfoLoad, AAInfoStore; + AAInfoLoad.Scope = LoadScope; + AAInfoLoad.NoAlias = StoreScope; + + AAInfoStore.Scope = StoreScope; + AAInfoStore.NoAlias = LoadScope; + for (unsigned i = 0; i != NumMemOps; ++i) { EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; @@ -4928,14 +4943,16 @@ // FIXME does the case above also need this? EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); assert(NVT.bitsGE(VT)); + + Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), SrcPtrInfo.getWithOffset(SrcOff), VT, - MinAlign(SrcAlign, SrcOff), MMOFlags); + MinAlign(SrcAlign, SrcOff), MMOFlags, AAInfoLoad); OutChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags, AAInfoStore); } OutChains.push_back(Store); SrcOff += VTSize; Index: test/CodeGen/PowerPC/memcpy-cluster.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/memcpy-cluster.ll @@ -0,0 +1,46 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=-vsx -ppc-asm-full-reg-names < %s | FileCheck %s -check-prefix=PWR8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck %s -check-prefix=PWR9 +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @foo1(double* nocapture %x, double* nocapture readonly %y) #0 { +entry: + %0 = bitcast double* %x to i8* + %1 = bitcast double* %y to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 8, i1 false) + ret void +; PWR8-LABEL: @foo1 +; PWR8: ld [[REG1:r[0-9]+]], 24([[BASE1:r[0-9]+]]) +; PWR8-NEXT: ld [[REG2:r[0-9]+]], 0([[BASE1]]) +; PWR8-NEXT: ld [[REG3:r[0-9]+]], 8([[BASE1]]) +; PWR8-NEXT: ld [[REG4:r[0-9]+]], 16([[BASE1]]) +; PWR8-NEXT: std [[REG1:r[0-9]+]], 24([[BASE2:r[0-9]+]]) +; PWR8-NEXT: std [[REG4:r[0-9]+]], 16([[BASE2]]) +; PWR8-NEXT: std [[REG3:r[0-9]+]], 8([[BASE2]]) +; PWR8-NEXT: std [[REG2:r[0-9]+]], 0([[BASE2]]) +; PWR8-NEXT: blr + +} + +; Function Attrs: nounwind +define void @foo2(double* nocapture %x, double* nocapture readonly %y) #0 { +entry: + %0 = bitcast double* %x to i8* + %1 = bitcast double* %y to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 8, i1 false) + ret void +; PWR9-LABEL: @foo2 +; PWR9: li [[OFFSET:r[0-9]+]], 16 +; PWR9-NEXT: lxvx [[REG1:vs[0-9]+]], 0, [[BASE1:r[0-9]+]] +; PWR9-NEXT: lxvx [[REG2:vs[0-9]+]], [[BASE1]], [[OFFSET]] +; PWR9-NEXT: stxvx [[REG3:vs[0-9]+]], [[BASE2:r[0-9]+]], [[OFFSET]] +; PWR9-NEXT: stxvx [[REG4:vs[0-9]+]], 0, [[BASE2]] +; PWR9-NEXT: blr + +} + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 + +attributes #0 = { nounwind }