Index: lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- lib/Analysis/MemoryDependenceAnalysis.cpp +++ lib/Analysis/MemoryDependenceAnalysis.cpp @@ -1124,21 +1124,15 @@ // If we already have a cache entry for this CacheKey, we may need to do some // work to reconcile the cache entry and the current query. if (!Pair.second) { - if (CacheInfo->Size < Loc.Size) { - // The query's Size is greater than the cached one. Throw out the - // cached data and proceed with the query at the greater size. + if (CacheInfo->Size != Loc.Size) { + // The query's Size differs from the cached one. Throw out the + // cached data and proceed with the query at the new size. CacheInfo->Pair = BBSkipFirstBlockPair(); CacheInfo->Size = Loc.Size; for (auto &Entry : CacheInfo->NonLocalDeps) if (Instruction *Inst = Entry.getResult().getInst()) RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); CacheInfo->NonLocalDeps.clear(); - } else if (CacheInfo->Size > Loc.Size) { - // This query's Size is less than the cached one. Conservatively restart - // the query using the greater size. - return getNonLocalPointerDepFromBB( - QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad, - StartBB, Result, Visited, SkipFirstBlock); } // If the query's AATags are inconsistent with the cached one, Index: lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -476,22 +476,28 @@ Alignment = DL.getABITypeAlignment(EltType); } - AMemSet = - Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); - DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI : Range.TheStores) - dbgs() << *SI << '\n'; - dbgs() << "With: " << *AMemSet << '\n'); - - if (!Range.TheStores.empty()) - AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); + dbgs() << *SI << '\n'); // Zap all the stores. for (Instruction *SI : Range.TheStores) { MD->removeInstruction(SI); SI->eraseFromParent(); } + + // Create the memset after removing the stores, so that if there any cached + // non-local dependencies on the removed instructions, the cache entries + // are updated to "dirty" entries pointing below the memset, so subsequent + // queries include the memset. + AMemSet = + Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); + + DEBUG(dbgs() << "With: " << *AMemSet << '\n'); + + if (!Range.TheStores.empty()) + AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); + ++NumMemSetInfer; } Index: test/Transforms/MemCpyOpt/mixed-sizes.ll =================================================================== --- test/Transforms/MemCpyOpt/mixed-sizes.ll +++ test/Transforms/MemCpyOpt/mixed-sizes.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -memcpyopt -S | FileCheck %s +; Handle memcpy-memcpy dependencies of differing sizes correctly. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Don't delete the second memcpy, even though there's an earlier +; memcpy with a larger size from the same address. + +; CHECK-LABEL: @foo +define i32 @foo(i1 %z) { +entry: + %a = alloca [10 x i32] + %s = alloca [10 x i32] + %0 = bitcast [10 x i32]* %a to i8* + %1 = bitcast [10 x i32]* %s to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull %1, i8 0, i64 40, i32 16, i1 false) + %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %a, i64 0, i64 0 + store i32 1, i32* %arrayidx + %scevgep = getelementptr [10 x i32], [10 x i32]* %s, i64 0, i64 1 + %scevgep7 = bitcast i32* %scevgep to i8* + br i1 %z, label %for.body3.lr.ph, label %for.inc7.1 + +for.body3.lr.ph: ; preds = %entry + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 17179869180, i32 4, i1 false) + br label %for.inc7.1 + +for.inc7.1: +; CHECK: for.inc7.1: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 4, i32 4, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 4, i32 4, i1 false) + %2 = load i32, i32* %arrayidx + ret i32 %2 +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1)