diff --git a/llvm/include/llvm/Transforms/Utils/Evaluator.h b/llvm/include/llvm/Transforms/Utils/Evaluator.h --- a/llvm/include/llvm/Transforms/Utils/Evaluator.h +++ b/llvm/include/llvm/Transforms/Utils/Evaluator.h @@ -138,6 +138,8 @@ SmallVectorImpl &Formals); Constant *ComputeLoadResult(Constant *P, Type *Ty); + Constant *ComputeLoadResult(GlobalVariable *GV, Type *Ty, + const APInt &Offset); /// As we compute SSA register values, we store their contents here. The back /// of the deque contains the current function and the stack contains the diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp --- a/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -217,10 +217,13 @@ P = cast(P->stripAndAccumulateConstantOffsets( DL, Offset, /* AllowNonInbounds */ true)); Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(P->getType())); - auto *GV = dyn_cast(P); - if (!GV) - return nullptr; + if (auto *GV = dyn_cast(P)) + return ComputeLoadResult(GV, Ty, Offset); + return nullptr; +} +Constant *Evaluator::ComputeLoadResult(GlobalVariable *GV, Type *Ty, + const APInt &Offset) { auto It = MutatedMemory.find(GV); if (It != MutatedMemory.end()) return It->second.read(Ty, Offset, DL); @@ -436,16 +439,39 @@ << "intrinsic.\n"); return false; } + + auto *LenC = dyn_cast(getVal(MSI->getLength())); + if (!LenC) { + LLVM_DEBUG(dbgs() << "Memset with unknown length.\n"); + return false; + } + Constant *Ptr = getVal(MSI->getDest()); + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = cast(Ptr->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true)); + auto *GV = dyn_cast(Ptr); + if (!GV) { + LLVM_DEBUG(dbgs() << "Memset with unknown base.\n"); + return false; + } + Constant *Val = getVal(MSI->getValue()); - Constant *DestVal = - ComputeLoadResult(getVal(Ptr), MSI->getValue()->getType()); - if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { - // This memset is a no-op. - LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n"); - ++CurInst; - continue; + APInt Len = LenC->getValue(); + while (Len != 0) { + Constant *DestVal = ComputeLoadResult(GV, Val->getType(), Offset); + if (DestVal != Val) { + LLVM_DEBUG(dbgs() << "Memset is not a no-op at offset " + << Offset << " of " << *GV << ".\n"); + return false; + } + ++Offset; + --Len; } + + LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n"); + ++CurInst; + continue; } if (II->isLifetimeStartOrEnd()) { diff --git a/llvm/test/Transforms/GlobalOpt/ctor-memset.ll b/llvm/test/Transforms/GlobalOpt/ctor-memset.ll --- a/llvm/test/Transforms/GlobalOpt/ctor-memset.ll +++ b/llvm/test/Transforms/GlobalOpt/ctor-memset.ll @@ -1,7 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -S -globalopt < %s | FileCheck %s -@llvm.global_ctors = appending global [8 x { i32, ptr, ptr }] [ +target datalayout = "p1:32:32" + +@llvm.global_ctors = appending global [9 x { i32, ptr, ptr }] [ { i32, ptr, ptr } { i32 65535, ptr @ctor0, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor1, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor2, ptr null }, @@ -9,11 +11,12 @@ { i32, ptr, ptr } { i32 65535, ptr @ctor4, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor5, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor6, ptr null }, - { i32, ptr, ptr } { i32 65535, ptr @ctor7, ptr null } + { i32, ptr, ptr } { i32 65535, ptr @ctor7, ptr null }, + { i32, ptr, ptr } { i32 65535, ptr @ctor8, ptr null } ] ;. -; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @ctor6, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor7, ptr null }] +; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @ctor3, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor4, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor7, ptr null }] ; CHECK: @[[G0:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32 } zeroinitializer ; CHECK: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32, i32 } { i32 0, i32 0, i32 1 } ; CHECK: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32, i32 } { i32 1, i32 0, i32 0 } @@ -22,11 +25,11 @@ ; CHECK: @[[G5:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i16, i32 } { i16 0, i32 1 } ; CHECK: @[[G6:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32 } { i32 -1, i32 -1 } ; CHECK: @[[G7:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32 } { i32 -1, i32 1 } +; CHECK: @[[G8:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr addrspace(1) global { i32, i32 } zeroinitializer ;. ; memset of all-zero global @g0 = global { i32, i32 } zeroinitializer - define internal void @ctor0() { call void @llvm.memset.p0.i64(ptr @g0, i8 0, i64 8, i1 false) ret void @@ -52,6 +55,10 @@ @g3 = global { i32, i32 } { i32 0, i32 1 } define internal void @ctor3() { +; CHECK-LABEL: @ctor3( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr @g3, i8 0, i64 8, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0.i64(ptr @g3, i8 0, i64 8, i1 false) ret void } @@ -60,11 +67,17 @@ @g4 = global { i32, i32 } { i32 0, i32 undef } define internal void @ctor4() { +; CHECK-LABEL: @ctor4( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr @g4, i8 0, i64 8, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0.i64(ptr @g4, i8 0, i64 8, i1 false) ret void } ; memset including padding bytes +; FIXME: We still incorrectly optimize the memset away here, even though code +; might access the padding. @g5 = global { i16, i32 } { i16 0, i32 1 } define internal void @ctor5() { @@ -76,10 +89,6 @@ @g6 = global { i32, i32 } { i32 -1, i32 -1 } define internal void @ctor6() { -; CHECK-LABEL: @ctor6( -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr @g6, i8 -1, i64 8, i1 false) -; CHECK-NEXT: ret void -; call void @llvm.memset.p0.i64(ptr @g6, i8 -1, i64 8, i1 false) ret void } @@ -96,6 +105,14 @@ ret void } +; memset of zero value in differently-sized address space +@g8 = addrspace(1) global { i32, i32 } zeroinitializer + +define internal void @ctor8() { + call void @llvm.memset.p0.i64(ptr addrspacecast (ptr addrspace(1) @g8 to ptr), i8 0, i64 8, i1 false) + ret void +} + declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly }