diff --git a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h --- a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h +++ b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h @@ -21,6 +21,7 @@ class MemCpyInst; class MemMoveInst; class MemSetInst; +class ScalarEvolution; class TargetTransformInfo; class Value; struct Align; @@ -28,9 +29,9 @@ /// Emit a loop implementing the semantics of llvm.memcpy where the size is not /// a compile-time constant. Loop will be insterted at \p InsertBefore. void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, - Value *DstAddr, Value *CopyLen, - Align SrcAlign, Align DestAlign, - bool SrcIsVolatile, bool DstIsVolatile, + Value *DstAddr, Value *CopyLen, Align SrcAlign, + Align DestAlign, bool SrcIsVolatile, + bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI); /// Emit a loop implementing the semantics of an llvm.memcpy whose size is a @@ -39,10 +40,11 @@ Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, - const TargetTransformInfo &TTI); + bool CanOverlap, const TargetTransformInfo &TTI); /// Expand \p MemCpy as a loop. \p MemCpy is not deleted. -void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI); +void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, + ScalarEvolution *SE = nullptr); /// Expand \p MemMove as a loop. \p MemMove is not deleted. void expandMemMoveAsLoop(MemMoveInst *MemMove); diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp --- a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -115,7 +115,8 @@ /* SrcAlign */ LI->getAlign(), /* DestAlign */ SI->getAlign(), /* SrcIsVolatile */ LI->isVolatile(), - /* DstIsVolatile */ SI->isVolatile(), TTI); + /* DstIsVolatile */ SI->isVolatile(), + /* CanOverlap */ true, TTI); SI->eraseFromParent(); LI->eraseFromParent(); diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp --- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -7,9 +7,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; @@ -18,6 +20,7 @@ Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, + bool CanOverlap, const TargetTransformInfo &TTI) { // No need to expand zero length copies. if (CopyLen->isZero()) @@ -28,6 +31,10 @@ Function *ParentFunc = PreLoopBB->getParent(); LLVMContext &Ctx = PreLoopBB->getContext(); const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); + MDBuilder MDB(Ctx); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); + StringRef Name = "MemCopyAliasScope"; + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); unsigned SrcAS = cast(SrcAddr->getType())->getAddressSpace(); unsigned DstAS = cast(DstAddr->getType())->getAddressSpace(); @@ -68,12 +75,21 @@ // Loop Body Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); - Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, - PartSrcAlign, SrcIsVolatile); + LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, + PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) { + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, + MDNode::get(Ctx, NewScope)); + } Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); - LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); - + StoreInst *Store = LoopBuilder.CreateAlignedStore( + Load, DstGEP, PartDstAlign, DstIsVolatile); + if (!CanOverlap) { + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); + } Value *NewIndex = LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); LoopIndex->addIncoming(NewIndex, LoopBB); @@ -111,9 +127,13 @@ : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); Value *SrcGEP = RBuilder.CreateInBoundsGEP( OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); - Value *Load = + LoadInst *Load = RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile); - + if (!CanOverlap) { + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, + MDNode::get(Ctx, NewScope)); + } // Cast destination to operand type and store. PointerType *DstPtrType = PointerType::get(OpTy, DstAS); Value *CastedDst = DstAddr->getType() == DstPtrType @@ -121,8 +141,12 @@ : RBuilder.CreateBitCast(DstAddr, DstPtrType); Value *DstGEP = RBuilder.CreateInBoundsGEP( OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); - RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); - + StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, + DstIsVolatile); + if (!CanOverlap) { + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); + } BytesCopied += OperandSize; } } @@ -134,7 +158,7 @@ Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, - bool DstIsVolatile, + bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI) { BasicBlock *PreLoopBB = InsertBefore->getParent(); BasicBlock *PostLoopBB = @@ -143,6 +167,11 @@ Function *ParentFunc = PreLoopBB->getParent(); const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); LLVMContext &Ctx = PreLoopBB->getContext(); + MDBuilder MDB(Ctx); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); + StringRef Name = "MemCopyAliasScope"; + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); + unsigned SrcAS = cast(SrcAddr->getType())->getAddressSpace(); unsigned DstAS = cast(DstAddr->getType())->getAddressSpace(); @@ -183,11 +212,19 @@ LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); - Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, PartSrcAlign, - SrcIsVolatile); + LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, + PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) { + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope)); + } Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); - LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); - + StoreInst *Store = + LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); + if (!CanOverlap) { + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); + } Value *NewIndex = LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); LoopIndex->addIncoming(NewIndex, LoopBB); @@ -237,12 +274,21 @@ Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); Value *SrcGEP = ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset); - Value *Load = ResBuilder.CreateAlignedLoad(Int8Type, SrcGEP, PartSrcAlign, - SrcIsVolatile); + LoadInst *Load = ResBuilder.CreateAlignedLoad(Int8Type, SrcGEP, + PartSrcAlign, SrcIsVolatile); + if (!CanOverlap) { + // Set alias scope for loads. + Load->setMetadata(LLVMContext::MD_alias_scope, + MDNode::get(Ctx, NewScope)); + } Value *DstGEP = ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset); - ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); - + StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, + DstIsVolatile); + if (!CanOverlap) { + // Indicate that stores don't overlap loads. + Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); + } Value *ResNewIndex = ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U)); ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); @@ -426,7 +472,16 @@ } void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, - const TargetTransformInfo &TTI) { + const TargetTransformInfo &TTI, + ScalarEvolution *SE) { + bool CanOverlap = true; + if (SE) { + auto *SrcSCEV = SE->getSCEV(Memcpy->getRawSource()); + auto *DestSCEV = SE->getSCEV(Memcpy->getRawDest()); + if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy)) + CanOverlap = false; + } + if (ConstantInt *CI = dyn_cast(Memcpy->getLength())) { createMemCpyLoopKnownSize( /* InsertBefore */ Memcpy, @@ -437,6 +492,7 @@ /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), + /* CanOverlap */ CanOverlap, /* TargetTransformInfo */ TTI); } else { createMemCpyLoopUnknownSize( @@ -448,6 +504,7 @@ /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), + /* CanOverlap */ CanOverlap, /* TargetTransformInfo */ TTI); } } diff --git a/llvm/unittests/Transforms/Utils/MemTransferLowering.cpp b/llvm/unittests/Transforms/Utils/MemTransferLowering.cpp --- a/llvm/unittests/Transforms/Utils/MemTransferLowering.cpp +++ b/llvm/unittests/Transforms/Utils/MemTransferLowering.cpp @@ -119,19 +119,15 @@ auto *MemCpyBB = getBasicBlockByName(F, "memcpy"); Instruction *Inst = &MemCpyBB->front(); MemCpyInst *MemCpyI = cast(Inst); - expandMemCpyAsLoop(MemCpyI, TTI); + auto &SE = FAM.getResult(F); + expandMemCpyAsLoop(MemCpyI, TTI, &SE); auto *CopyLoopBB = getBasicBlockByName(F, "load-store-loop"); Instruction *LoadInst = getInstructionByOpcode(*CopyLoopBB, Instruction::Load, 1); - EXPECT_NONFATAL_FAILURE( - EXPECT_NE(LoadInst->getMetadata(LLVMContext::MD_alias_scope), - nullptr), - ""); + EXPECT_NE(nullptr, LoadInst->getMetadata(LLVMContext::MD_alias_scope)); Instruction *StoreInst = getInstructionByOpcode(*CopyLoopBB, Instruction::Store, 1); - EXPECT_NONFATAL_FAILURE( - EXPECT_NE(StoreInst->getMetadata(LLVMContext::MD_noalias), nullptr), - ""); + EXPECT_NE(nullptr, StoreInst->getMetadata(LLVMContext::MD_noalias)); return PreservedAnalyses::none(); })); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); @@ -163,14 +159,15 @@ auto *MemCpyBB = getBasicBlockByName(F, "memcpy"); Instruction *Inst = &MemCpyBB->front(); MemCpyInst *MemCpyI = cast(Inst); - expandMemCpyAsLoop(MemCpyI, TTI); + auto &SE = FAM.getResult(F); + expandMemCpyAsLoop(MemCpyI, TTI, &SE); return PreservedAnalyses::none(); })); FPM.addPass(LoopVectorizePass(LoopVectorizeOptions())); FPM.addPass(ForwardingPass( [=](Function &F, FunctionAnalysisManager &FAM) -> PreservedAnalyses { auto *TargetBB = getBasicBlockByName(F, "vector.body"); - EXPECT_NONFATAL_FAILURE(EXPECT_NE(TargetBB, nullptr), ""); + EXPECT_NE(nullptr, TargetBB); return PreservedAnalyses::all(); })); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));