diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -912,6 +912,7 @@ Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS); Type *IntIdxTy = DL->getIndexType(DestPtr->getType()); + bool Changed = false; const SCEV *Start = Ev->getStart(); // Handle negative strided loops. if (NegStride) @@ -920,7 +921,7 @@ // TODO: ideally we should still be able to generate memset if SCEV expander // is taught to generate the dependencies at the latest point. if (!isSafeToExpand(Start, *SE)) - return false; + return Changed; // Okay, we have a strided store "p[i]" of a splattable value. We can turn // this into a memset in the loop preheader now if we want. However, this @@ -929,16 +930,26 @@ // base pointer and checking the region. Value *BasePtr = Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator()); + + // From here on out, conservatively report to the pass manager that we've + // changed the IR, even if we later clean up these added instructions. There + // may be structural differences e.g. in the order of use lists not accounted + // for in just a textual dump of the IR. This is written as a variable, even + // though statically all the places this dominates could be replaced with + // 'true', with the hope that anyone trying to be clever / "more precise" with + // the return value will read this comment, and leave them alone. + Changed = true; + if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount, StoreSize, *AA, Stores)) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI); - return false; + return Changed; } if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset)) - return false; + return Changed; // Okay, everything looks good, insert the memset. @@ -948,7 +959,7 @@ // TODO: ideally we should still be able to generate memset if SCEV expander // is taught to generate the dependencies at the latest point. if (!isSafeToExpand(NumBytesS, *SE)) - return false; + return Changed; Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator()); @@ -1067,6 +1078,7 @@ ExpandedValuesCleaner EVC(Expander, TLI); + bool Changed = false; const SCEV *StrStart = StoreEv->getStart(); unsigned StrAS = SI->getPointerAddressSpace(); Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS)); @@ -1085,11 +1097,20 @@ StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator()); EVC.add(StoreBasePtr); + // From here on out, conservatively report to the pass manager that we've + // changed the IR, even if we later clean up these added instructions. There + // may be structural differences e.g. in the order of use lists not accounted + // for in just a textual dump of the IR. This is written as a variable, even + // though statically all the places this dominates could be replaced with + // 'true', with the hope that anyone trying to be clever / "more precise" with + // the return value will read this comment, and leave them alone. + Changed = true; + SmallPtrSet Stores; Stores.insert(SI); if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, StoreSize, *AA, Stores)) - return false; + return Changed; const SCEV *LdStart = LoadEv->getStart(); unsigned LdAS = LI->getPointerAddressSpace(); @@ -1106,10 +1127,10 @@ if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, StoreSize, *AA, Stores)) - return false; + return Changed; if (avoidLIRForMultiBlockLoop()) - return false; + return Changed; // Okay, everything is safe, we can transform this! @@ -1133,14 +1154,14 @@ const Align StoreAlign = SI->getAlign(); const Align LoadAlign = LI->getAlign(); if (StoreAlign < StoreSize || LoadAlign < StoreSize) - return false; + return Changed; // If the element.atomic memcpy is not lowered into explicit // loads/stores later, then it will be lowered into an element-size // specific lib call. If the lib call doesn't exist for our store size, then // we shouldn't generate the memcpy. if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize()) - return false; + return Changed; // Create the call. // Note that unordered atomic loads/stores are *required* by the spec to