Index: llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp +++ llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -154,6 +154,12 @@ return ModRefInfo::Mod; } + if (const MemSetInst *MI = dyn_cast(Inst)) { + Loc = MemoryLocation::getForDest(MI); + // Conversatively assume ModRef for volatile memset. + return MI->isVolatile() ? ModRefInfo::ModRef : ModRefInfo::Mod; + } + if (const IntrinsicInst *II = dyn_cast(Inst)) { switch (II->getIntrinsicID()) { case Intrinsic::lifetime_start: Index: llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1144,6 +1144,21 @@ return true; } +/// Determine whether the instruction has undefined content for the given Size, +/// either because it was freshly alloca'd or started its lifetime. +static bool hasUndefContents(Instruction *I, ConstantInt *Size) { + if (isa(I)) + return true; + + if (IntrinsicInst *II = dyn_cast(I)) + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + if (ConstantInt *LTSize = dyn_cast(II->getArgOperand(0))) + if (LTSize->getZExtValue() >= Size->getZExtValue()) + return true; + + return false; +} + /// Transform memcpy to memset when its source was just memset. /// In other words, turn: /// \code @@ -1167,12 +1182,23 @@ if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource())) return false; - ConstantInt *CopySize = cast(MemCpy->getLength()); + // A known memset size is required. ConstantInt *MemSetSize = dyn_cast(MemSet->getLength()); + if (!MemSetSize) + return false; + // Make sure the memcpy doesn't read any more than what the memset wrote. // Don't worry about sizes larger than i64. - if (!MemSetSize || CopySize->getZExtValue() > MemSetSize->getZExtValue()) - return false; + ConstantInt *CopySize = cast(MemCpy->getLength()); + if (CopySize->getZExtValue() > MemSetSize->getZExtValue()) { + // If the memcpy is larger than the memset, but the memory was undef prior + // to the memset, we can just ignore the tail. + MemDepResult DepInfo = MD->getDependency(MemSet); + if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize)) + CopySize = MemSetSize; + else + return false; + } IRBuilder<> Builder(MemCpy); Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), @@ -1252,19 +1278,7 @@ if (MemCpyInst *MDep = dyn_cast(SrcDepInfo.getInst())) return processMemCpyMemCpyDependence(M, MDep); } else if (SrcDepInfo.isDef()) { - Instruction *I = SrcDepInfo.getInst(); - bool hasUndefContents = false; - - if (isa(I)) { - hasUndefContents = true; - } else if (IntrinsicInst *II = dyn_cast(I)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start) - if (ConstantInt *LTSize = dyn_cast(II->getArgOperand(0))) - if (LTSize->getZExtValue() >= CopySize->getZExtValue()) - hasUndefContents = true; - } - - if (hasUndefContents) { + if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) { MD->removeInstruction(M); M->eraseFromParent(); ++NumMemCpyInstr; Index: llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll =================================================================== --- llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll +++ llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false) ; CHECK-NEXT: ret void ; %a = alloca %T, align 8 @@ -28,7 +28,7 @@ ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[B]]) ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false) ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[B]]) ; CHECK-NEXT: ret void ; @@ -46,7 +46,7 @@ ; CHECK-NEXT: [[A:%.*]] = call i8* @malloc(i64 16) ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[A]]) ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8 0, i64 12, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[A]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false) ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[A]]) ; CHECK-NEXT: call void @free(i8* [[A]]) ; CHECK-NEXT: ret void @@ -98,7 +98,7 @@ ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 true) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false) ; CHECK-NEXT: ret void ; %a = alloca %T, align 8