Index: llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1010,6 +1010,25 @@ return true; } +class ExpandedValuesCleaner { + + SCEVExpander &Expander; + TargetLibraryInfo *TLI; + SmallVector ExpandedValues; + +public: + ExpandedValuesCleaner(SCEVExpander &Expander, TargetLibraryInfo *TLI) + : Expander(Expander), TLI(TLI) {} + + void add(Value *V) { ExpandedValues.push_back(V); } + + void clean() { + Expander.clear(); + for (auto *V : ExpandedValues) + RecursivelyDeleteTriviallyDeadInstructions(V, TLI); + } +}; + /// If the stored value is a strided load in the same loop with the same stride /// this may be transformable into a memcpy. This kicks in for stuff like /// for (i) A[i] = B[i]; @@ -1040,6 +1059,8 @@ IRBuilder<> Builder(Preheader->getTerminator()); SCEVExpander Expander(*SE, *DL, "loop-idiom"); + ExpandedValuesCleaner EVC{Expander, TLI}; + const SCEV *StrStart = StoreEv->getStart(); unsigned StrAS = SI->getPointerAddressSpace(); Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS)); @@ -1056,14 +1077,13 @@ // checking everything. Value *StoreBasePtr = Expander.expandCodeFor( StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator()); + EVC.add(StoreBasePtr); SmallPtrSet Stores; Stores.insert(SI); if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, StoreSize, *AA, Stores)) { - Expander.clear(); - // If we generated new code for the base pointer, clean up. - RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); + EVC.clean(); return false; } @@ -1078,18 +1098,18 @@ // mutated by the loop. Value *LoadBasePtr = Expander.expandCodeFor( LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator()); + EVC.add(LoadBasePtr); if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, StoreSize, *AA, Stores)) { - Expander.clear(); - // If we generated new code for the base pointer, clean up. - RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); - RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); + EVC.clean(); return false; } - if (avoidLIRForMultiBlockLoop()) + if (avoidLIRForMultiBlockLoop()) { + EVC.clean(); return false; + } // Okay, everything is safe, we can transform this! @@ -1098,6 +1118,7 @@ Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator()); + EVC.add(NumBytes); CallInst *NewCall = nullptr; // Check whether to generate an unordered atomic memcpy: @@ -1111,15 +1132,19 @@ // anything where the alignment isn't at least the element size. const Align StoreAlign = SI->getAlign(); const Align LoadAlign = LI->getAlign(); - if (StoreAlign < StoreSize || LoadAlign < StoreSize) + if (StoreAlign < StoreSize || LoadAlign < StoreSize) { + EVC.clean(); return false; + } // If the element.atomic memcpy is not lowered into explicit // loads/stores later, then it will be lowered into an element-size // specific lib call. If the lib call doesn't exist for our store size, then // we shouldn't generate the memcpy. - if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize()) + if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize()) { + EVC.clean(); return false; + } // Create the call. // Note that unordered atomic loads/stores are *required* by the spec to