diff --git a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h --- a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h +++ b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h @@ -21,6 +21,7 @@ class MemCpyInst; class MemMoveInst; class MemSetInst; +class ScalarEvolution; class TargetTransformInfo; class Value; struct Align; @@ -28,9 +29,9 @@ /// Emit a loop implementing the semantics of llvm.memcpy where the size is not /// a compile-time constant. Loop will be insterted at \p InsertBefore. void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, - Value *DstAddr, Value *CopyLen, - Align SrcAlign, Align DestAlign, - bool SrcIsVolatile, bool DstIsVolatile, + Value *DstAddr, Value *CopyLen, Align SrcAlign, + Align DestAlign, bool SrcIsVolatile, + bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI); /// Emit a loop implementing the semantics of an llvm.memcpy whose size is a @@ -39,10 +40,11 @@ Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, - const TargetTransformInfo &TTI); + bool CanOverlap, const TargetTransformInfo &TTI); /// Expand \p MemCpy as a loop. \p MemCpy is not deleted. -void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI); +void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, + ScalarEvolution *SE = nullptr); /// Expand \p MemMove as a loop. \p MemMove is not deleted. void expandMemMoveAsLoop(MemMoveInst *MemMove); diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp --- a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -115,7 +115,8 @@ /* SrcAlign */ LI->getAlign(), /* DestAlign */ SI->getAlign(), /* SrcIsVolatile */ LI->isVolatile(), - /* DstIsVolatile */ SI->isVolatile(), TTI); + /* DstIsVolatile */ SI->isVolatile(), + /* CanOverlap */ true, TTI); SI->eraseFromParent(); LI->eraseFromParent(); diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp --- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -7,8 +7,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -18,6 +20,7 @@ Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, + bool CanOverlap, const TargetTransformInfo &TTI) { // No need to expand zero length copies. if (CopyLen->isZero()) @@ -28,6 +31,10 @@ Function *ParentFunc = PreLoopBB->getParent(); LLVMContext &Ctx = PreLoopBB->getContext(); const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); + MDBuilder MDB(Ctx); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); + StringRef Name = "MemCopyAliasScope"; + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); unsigned SrcAS = cast(SrcAddr->getType())->getAddressSpace(); unsigned DstAS = cast(DstAddr->getType())->getAddressSpace(); @@ -68,11 +75,20 @@ // Loop Body Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); - Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, + LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, + MDNode::get(Ctx, NewScope)); + Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); - LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); + StoreInst *Store = LoopBuilder.CreateAlignedStore( + Load, DstGEP, PartDstAlign, DstIsVolatile); + if (!CanOverlap) + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); Value *NewIndex = LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); @@ -111,8 +127,12 @@ : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); Value *SrcGEP = RBuilder.CreateInBoundsGEP( OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); - Value *Load = + LoadInst *Load = RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, + MDNode::get(Ctx, NewScope)); // Cast destination to operand type and store. PointerType *DstPtrType = PointerType::get(OpTy, DstAS); @@ -121,7 +141,11 @@ : RBuilder.CreateBitCast(DstAddr, DstPtrType); Value *DstGEP = RBuilder.CreateInBoundsGEP( OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); - RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); + StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, + DstIsVolatile); + if (!CanOverlap) + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); BytesCopied += OperandSize; } @@ -134,7 +158,7 @@ Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, - bool DstIsVolatile, + bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI) { BasicBlock *PreLoopBB = InsertBefore->getParent(); BasicBlock *PostLoopBB = @@ -143,6 +167,11 @@ Function *ParentFunc = PreLoopBB->getParent(); const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); LLVMContext &Ctx = PreLoopBB->getContext(); + MDBuilder MDB(Ctx); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); + StringRef Name = "MemCopyAliasScope"; + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); + unsigned SrcAS = cast(SrcAddr->getType())->getAddressSpace(); unsigned DstAS = cast(DstAddr->getType())->getAddressSpace(); @@ -183,10 +212,18 @@ LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); - Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, PartSrcAlign, - SrcIsVolatile); + LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, + PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope)); + Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); - LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); + StoreInst *Store = + LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); + if (!CanOverlap) + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); Value *NewIndex = LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); @@ -237,11 +274,20 @@ Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); Value *SrcGEP = ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset); - Value *Load = ResBuilder.CreateAlignedLoad(Int8Type, SrcGEP, PartSrcAlign, - SrcIsVolatile); + LoadInst *Load = ResBuilder.CreateAlignedLoad(Int8Type, SrcGEP, + PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, + MDNode::get(Ctx, NewScope)); + Value *DstGEP = ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset); - ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); + StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, + DstIsVolatile); + if (!CanOverlap) + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); Value *ResNewIndex = ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U)); @@ -420,7 +466,16 @@ } void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, - const TargetTransformInfo &TTI) { + const TargetTransformInfo &TTI, + ScalarEvolution *SE) { + bool CanOverlap = true; + if (SE) { + auto *SrcSCEV = SE->getSCEV(Memcpy->getRawSource()); + auto *DestSCEV = SE->getSCEV(Memcpy->getRawDest()); + if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy)) + CanOverlap = false; + } + if (ConstantInt *CI = dyn_cast(Memcpy->getLength())) { createMemCpyLoopKnownSize( /* InsertBefore */ Memcpy, @@ -431,6 +486,7 @@ /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), + /* CanOverlap */ CanOverlap, /* TargetTransformInfo */ TTI); } else { createMemCpyLoopUnknownSize( @@ -442,6 +498,7 @@ /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), + /* CanOverlap */ CanOverlap, /* TargetTransformInfo */ TTI); } } diff --git a/llvm/unittests/Transforms/Utils/CMakeLists.txt b/llvm/unittests/Transforms/Utils/CMakeLists.txt --- a/llvm/unittests/Transforms/Utils/CMakeLists.txt +++ b/llvm/unittests/Transforms/Utils/CMakeLists.txt @@ -29,3 +29,5 @@ ValueMapperTest.cpp VFABIUtils.cpp ) + +target_link_libraries(UtilsTests PRIVATE LLVMTestingSupport) \ No newline at end of file diff --git a/llvm/unittests/Transforms/Utils/MemTransferLowering.cpp b/llvm/unittests/Transforms/Utils/MemTransferLowering.cpp --- a/llvm/unittests/Transforms/Utils/MemTransferLowering.cpp +++ b/llvm/unittests/Transforms/Utils/MemTransferLowering.cpp @@ -109,7 +109,8 @@ assert(isa(Inst) && "Expecting llvm.memcpy.p0i8.i64 instructon"); MemCpyInst *MemCpyI = cast(Inst); - expandMemCpyAsLoop(MemCpyI, TTI); + auto &SE = FAM.getResult(F); + expandMemCpyAsLoop(MemCpyI, TTI, &SE); return PreservedAnalyses::none(); })); FPM.addPass(LoopVectorizePass(LoopVectorizeOptions())); @@ -117,7 +118,7 @@ ->PreservedAnalyses { // F.dump(); auto *TargetBB = getBasicBlockByName(F, "vector.body"); - EXPECT_NONFATAL_FAILURE(EXPECT_NE(TargetBB, nullptr), ""); + EXPECT_NE(TargetBB, nullptr); return PreservedAnalyses::all(); })); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));