Index: llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -800,14 +800,18 @@ SplatValue, MSI, MSIs, Ev, BECount, NegStride, /*IsLoopMemset=*/true); } -/// mayLoopAccessLocation - Return true if the specified loop might access the -/// specified pointer location, which is a loop-strided access. The 'Access' -/// argument specifies what the verboten forms of access are (read or write). -static bool -mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, - const SCEV *BECount, unsigned StoreSize, - AliasAnalysis &AA, - SmallPtrSetImpl &IgnoredStores) { +enum class LoopAccessKind { IllegalAccess = 0, LegalConstrained, Legal }; + +/// mayLoopAccessLocation - Return IllegalAccess if the specified loop might +/// access the \p Ptr, which is a loop-strided access, from an instruction that +/// is not \p II. Otherwise, if it accesses it from \p II, it is a +/// LegalConstrained is returned. Otherwise, if it accesses from no instruction, +/// Legal is returned. \p Access specifies what the verboten +/// forms of access are (read or write). +static LoopAccessKind mayLoopAccessLocation( + Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, + unsigned StoreSize, AliasAnalysis &AA, + SmallPtrSetImpl &IgnoredStores, Instruction *II = nullptr) { // Get the location that may be stored across the loop. Since the access is // strided positively through memory, we say that the modified location starts // at the pointer and has infinite size. @@ -825,15 +829,20 @@ // which will then no-alias a store to &A[100]. MemoryLocation StoreLoc(Ptr, AccessSize); + LoopAccessKind LoopAccess = LoopAccessKind::Legal; for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; ++BI) for (Instruction &I : **BI) if (IgnoredStores.count(&I) == 0 && isModOrRefSet( - intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access))) - return true; + intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access))) { + if (&I == II) + LoopAccess = LoopAccessKind::LegalConstrained; + else + return LoopAccessKind::IllegalAccess; + } - return false; + return LoopAccess; } // If we have a negative stride, Start refers to the end of the memory location @@ -928,8 +937,9 @@ // base pointer and checking the region. Value *BasePtr = Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator()); - if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount, - StoreSize, *AA, Stores)) { + auto LoopAccess = mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, + BECount, StoreSize, *AA, Stores); + if (LoopAccess == LoopAccessKind::IllegalAccess) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI); @@ -1058,8 +1068,10 @@ SmallPtrSet Stores; Stores.insert(SI); - if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, - StoreSize, *AA, Stores)) { + LoopAccessKind StoreLoopAccess = + mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, + StoreSize, *AA, Stores, LI); + if (StoreLoopAccess == LoopAccessKind::IllegalAccess) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); @@ -1078,8 +1090,13 @@ Value *LoadBasePtr = Expander.expandCodeFor( LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator()); - if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, - StoreSize, *AA, Stores)) { + LoopAccessKind LoadLoopAccess = mayLoopAccessLocation( + LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, StoreSize, *AA, Stores); + assert( + LoadLoopAccess != LoopAccessKind::LegalConstrained && + "The load can never have constrained access because we don't " + "pass any argument that can only constrain it and not make it illegal"); + if (LoadLoopAccess == LoopAccessKind::IllegalAccess) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); @@ -1102,10 +1119,17 @@ // Check whether to generate an unordered atomic memcpy: // If the load or store are atomic, then they must necessarily be unordered // by previous checks. - if (!SI->isAtomic() && !LI->isAtomic()) - NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr, - LI->getAlign(), NumBytes); - else { + if (!SI->isAtomic() && !LI->isAtomic()) { + if (StoreLoopAccess == LoopAccessKind::Legal) { + NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr, + LI->getAlign(), NumBytes); + } else { + assert(StoreLoopAccess == LoopAccessKind::LegalConstrained && + "We must have constrained access to insert a memmove"); + NewCall = Builder.CreateMemMove(StoreBasePtr, SI->getAlign(), LoadBasePtr, + LI->getAlign(), NumBytes); + } + } else { // We cannot allow unaligned ops for unordered load/store, so reject // anything where the alignment isn't at least the element size. const MaybeAlign StoreAlign = SI->getAlign(); @@ -1115,7 +1139,7 @@ if (*StoreAlign < StoreSize || *LoadAlign < StoreSize) return false; - // If the element.atomic memcpy is not lowered into explicit + // If the element.atomic memcpy / memmove is not lowered into explicit // loads/stores later, then it will be lowered into an element-size // specific lib call. If the lib call doesn't exist for our store size, then // we shouldn't generate the memcpy. @@ -1125,9 +1149,17 @@ // Create the call. // Note that unordered atomic loads/stores are *required* by the spec to // have an alignment but non-atomic loads/stores may not. - NewCall = Builder.CreateElementUnorderedAtomicMemCpy( - StoreBasePtr, *StoreAlign, LoadBasePtr, *LoadAlign, NumBytes, - StoreSize); + if (StoreLoopAccess == LoopAccessKind::Legal) { + NewCall = Builder.CreateElementUnorderedAtomicMemCpy( + StoreBasePtr, *StoreAlign, LoadBasePtr, *LoadAlign, NumBytes, + StoreSize); + } else { + assert(StoreLoopAccess == LoopAccessKind::LegalConstrained && + "We must have constrained access to insert a memmove"); + NewCall = Builder.CreateElementUnorderedAtomicMemMove( + StoreBasePtr, *StoreAlign, LoadBasePtr, *LoadAlign, NumBytes, + StoreSize); + } } NewCall->setDebugLoc(SI->getDebugLoc()); Index: llvm/test/Transforms/LoopIdiom/basic-address-space.ll =================================================================== --- llvm/test/Transforms/LoopIdiom/basic-address-space.ll +++ llvm/test/Transforms/LoopIdiom/basic-address-space.ll @@ -56,15 +56,15 @@ } ; PR9815 - This is a partial overlap case that cannot be safely transformed -; into a memcpy. +; into a memcpy. It should be transformed to memmove @g_50 = addrspace(2) global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16 define i32 @test14() nounwind { ; CHECK-LABEL: @test14( +; CHECK: memmove ; CHECK: for.body: ; CHECK: load i32 -; CHECK: store i32 ; CHECK: br i1 %cmp entry: Index: llvm/test/Transforms/LoopIdiom/basic.ll =================================================================== --- llvm/test/Transforms/LoopIdiom/basic.ll +++ llvm/test/Transforms/LoopIdiom/basic.ll @@ -431,7 +431,7 @@ ; PR9815 - This is a partial overlap case that cannot be safely transformed -; into a memcpy. +; into a memcpy. It should be transformed to memmove. @g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16 define i32 @test14() nounwind { @@ -456,9 +456,9 @@ %tmp8 = load i32, i32* getelementptr inbounds ([7 x i32], [7 x i32]* @g_50, i32 0, i64 6), align 4 ret i32 %tmp8 ; CHECK-LABEL: @test14( +; CHECK: memmove ; CHECK: for.body: ; CHECK: load i32 -; CHECK: store i32 ; CHECK: br i1 %cmp } @@ -477,9 +477,7 @@ br label %while.body ; CHECK-NOT: memcpy ; -; FIXME: When we regain the ability to form a memmove here, this test should be -; reversed and turned into a positive assertion. -; CHECK-NOT: memmove +; CHECK: memmove while.body: %phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ] @@ -488,7 +486,6 @@ ; CHECK: load %dst.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 0 store i32 %val, i32* %dst.ptr, align 4 -; CHECK: store %next.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1 %cmp = icmp eq i32* %next.ptr, %end.ptr br i1 %cmp, label %exit, label %while.body Index: llvm/test/Transforms/LoopIdiom/pr28196.ll =================================================================== --- llvm/test/Transforms/LoopIdiom/pr28196.ll +++ llvm/test/Transforms/LoopIdiom/pr28196.ll @@ -45,9 +45,9 @@ ; CHECK-LABEL: define void @test1_no_null_opt( ; CHECK-NOT: call void @llvm.memcpy +; CHECK: call void @llvm.memmove ; CHECK: getelementptr ; CHECK: getelementptr ; CHECK: load -; CHECK: store attributes #0 = { "null-pointer-is-valid"="true" }