Index: llvm/include/llvm/Transforms/Utils/Evaluator.h =================================================================== --- llvm/include/llvm/Transforms/Utils/Evaluator.h +++ llvm/include/llvm/Transforms/Utils/Evaluator.h @@ -138,6 +138,8 @@ SmallVectorImpl &Formals); Constant *ComputeLoadResult(Constant *P, Type *Ty); + Constant *ComputeLoadResult(GlobalVariable *GV, Type *Ty, + const APInt &Offset); /// As we compute SSA register values, we store their contents here. The back /// of the deque contains the current function and the stack contains the Index: llvm/lib/Transforms/Utils/Evaluator.cpp =================================================================== --- llvm/lib/Transforms/Utils/Evaluator.cpp +++ llvm/lib/Transforms/Utils/Evaluator.cpp @@ -217,10 +217,13 @@ P = cast(P->stripAndAccumulateConstantOffsets( DL, Offset, /* AllowNonInbounds */ true)); Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(P->getType())); - auto *GV = dyn_cast(P); - if (!GV) - return nullptr; + if (auto *GV = dyn_cast(P)) + return ComputeLoadResult(GV, Ty, Offset); + return nullptr; +} +Constant *Evaluator::ComputeLoadResult(GlobalVariable *GV, Type *Ty, + const APInt &Offset) { auto It = MutatedMemory.find(GV); if (It != MutatedMemory.end()) return It->second.read(Ty, Offset, DL); @@ -436,16 +439,41 @@ << "intrinsic.\n"); return false; } + + auto *LenC = dyn_cast(getVal(MSI->getLength())); + if (!LenC) { + LLVM_DEBUG(dbgs() << "Memset with unknown length.\n"); + return false; + } + Constant *Ptr = getVal(MSI->getDest()); + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = cast(Ptr->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true)); + Offset = Offset.sextOrTrunc( + DL.getIndexTypeSizeInBits(Ptr->getType())); + auto *GV = dyn_cast(Ptr); + if (!GV) { + LLVM_DEBUG(dbgs() << "Memset with unknown base.\n"); + return false; + } + Constant *Val = getVal(MSI->getValue()); - Constant *DestVal = - ComputeLoadResult(getVal(Ptr), MSI->getValue()->getType()); - if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { - // This memset is a no-op. - LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n"); - ++CurInst; - continue; + APInt Len = LenC->getValue(); + while (Len != 0) { + Constant *DestVal = ComputeLoadResult(GV, Val->getType(), Offset); + if (DestVal != Val) { + LLVM_DEBUG(dbgs() << "Memset is not a no-op at offset " + << Offset << " of " << *GV << ".\n"); + return false; + } + ++Offset; + --Len; } + + LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n"); + ++CurInst; + continue; } if (II->isLifetimeStartOrEnd()) { Index: llvm/test/Transforms/GlobalOpt/ctor-memset.ll =================================================================== --- llvm/test/Transforms/GlobalOpt/ctor-memset.ll +++ llvm/test/Transforms/GlobalOpt/ctor-memset.ll @@ -11,7 +11,7 @@ ] ;. -; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending global [0 x { i32, ptr, ptr }] zeroinitializer +; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @ctor3, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor4, ptr null }] ; CHECK: @[[G0:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32 } zeroinitializer ; CHECK: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32, i32 } { i32 0, i32 0, i32 1 } ; CHECK: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32, i32 } { i32 1, i32 0, i32 0 } @@ -47,6 +47,10 @@ @g3 = global { i32, i32 } { i32 0, i32 1 } define internal void @ctor3() { +; CHECK-LABEL: @ctor3( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr @g3, i8 0, i64 8, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0.i64(ptr @g3, i8 0, i64 8, i1 false) ret void } @@ -55,11 +59,17 @@ @g4 = global { i32, i32 } { i32 0, i32 undef } define internal void @ctor4() { +; CHECK-LABEL: @ctor4( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr @g4, i8 0, i64 8, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0.i64(ptr @g4, i8 0, i64 8, i1 false) ret void } ; memset including padding bytes +; FIXME: We still incorrectly optimize the memset away here, even though code +; might access the padding. @g5 = global { i16, i32 } { i16 0, i32 1 } define internal void @ctor5() { @@ -68,3 +78,6 @@ } declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly } +;.