Index: llvm/include/llvm/Analysis/TypeMetadataUtils.h =================================================================== --- llvm/include/llvm/Analysis/TypeMetadataUtils.h +++ llvm/include/llvm/Analysis/TypeMetadataUtils.h @@ -14,6 +14,7 @@ #ifndef LLVM_ANALYSIS_TYPEMETADATAUTILS_H #define LLVM_ANALYSIS_TYPEMETADATAUTILS_H +#include "llvm/IR/GlobalVariable.h" #include namespace llvm { @@ -81,6 +82,10 @@ /// target is `C`, and replaces the entire pattern with a constant zero. void replaceRelativePointerUsersWithZero(Constant *C); +// Remove the icmp instructions that use this global with a constant result +// (false or true). +void replaceICmpUsersWithFalse(GlobalVariable *GV); + } // namespace llvm #endif Index: llvm/lib/Analysis/TypeMetadataUtils.cpp =================================================================== --- llvm/lib/Analysis/TypeMetadataUtils.cpp +++ llvm/lib/Analysis/TypeMetadataUtils.cpp @@ -236,3 +236,37 @@ replaceRelativePointerUserWithZero(U); } } + +void llvm::replaceICmpUsersWithFalse(GlobalVariable *GV) { + // Find all the icmp instructions that use this global variable. + SmallPtrSet Compares; + for (auto *U : GV->users()) { + if (auto *I = dyn_cast(U)) { + Compares.insert(I); + } + } + + // Replace them all with a constant. + for (auto *I : Compares) { + assert(I->isEquality() && "expected only equality comparisons!"); + auto *Op0 = I->getOperand(0); + auto *Op1 = I->getOperand(1); + uint64_t Result; + if (Op0 == Op1) { + // Comparisons against itself are always true. + Result = 1; + } else if (Op0 == GV || Op1 == GV) { + // Comparisons against another pointer are always false. + Result = 0; + } else { + llvm_unreachable( + "expected one of the operands to be the global variable!"); + } + if (I->getPredicate() == CmpInst::Predicate::ICMP_NE) { + Result = !Result; + } + auto *V = ConstantInt::get(Type::getInt1Ty(I->getContext()), Result); + I->replaceAllUsesWith(V); + I->eraseFromParent(); + } +} Index: llvm/lib/Transforms/IPO/GlobalDCE.cpp =================================================================== --- llvm/lib/Transforms/IPO/GlobalDCE.cpp +++ llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -58,12 +58,53 @@ return false; } +// Returns true when V is a global with local (internal or private) linkage. +static bool isLocalGlobal(Value *V) { + auto *GV = dyn_cast(V); + if (GV && GV->hasLocalLinkage()) { + return true; + } + return false; +} + +static bool isDereferenceableOrNull(Value *O, const DataLayout &DL) { + // An inbounds GEP can either be a valid pointer (pointing into + // or to the end of an allocation), or be null in the default + // address space. So for an inbounds GEP there is no way to let + // the pointer escape using clever GEP hacking because doing so + // would make the pointer point outside of the allocated object + // and thus make the GEP result a poison value. Similarly, other + // dereferenceable pointers cannot be manipulated without producing + // poison. + if (auto *GEP = dyn_cast(O)) + if (GEP->isInBounds()) + return true; + bool CanBeNull, CanBeFreed; + return O->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); +} + /// Compute the set of GlobalValue that depends from V. /// The recursion stops as soon as a GlobalValue is met. void GlobalDCEPass::ComputeDependencies(Value *V, SmallPtrSetImpl &Deps) { if (auto *I = dyn_cast(V)) { Function *Parent = I->getParent()->getParent(); + if (auto *ICmp = dyn_cast(I)) { + // Check for icmp eq/ne against a local global variable. + // The icmp won't keep the global alive if the pointer it compares against + // is dereferenceable_or_null: there is no way it could have been captured + // in any other way. + if (ICmp->isEquality() && !Parent->nullPointerIsDefined()) { + auto *Op0 = ICmp->getOperand(0); + auto *Op1 = ICmp->getOperand(1); + const DataLayout &DL = I->getModule()->getDataLayout(); + if (Op0->getType()->isPointerTy() && + (isLocalGlobal(Op0) || isLocalGlobal(Op1)) && + isDereferenceableOrNull(Op0, DL) && + isDereferenceableOrNull(Op1, DL)) + return; + } + } Deps.insert(Parent); } else if (auto *GV = dyn_cast(V)) { Deps.insert(GV); @@ -394,8 +435,16 @@ } NumVariables += DeadGlobalVars.size(); - for (GlobalVariable *GV : DeadGlobalVars) + for (GlobalVariable *GV : DeadGlobalVars) { + if (!GV->use_empty()) { + // Local global variables might still be referenced from an icmp, after + // we've proven that the global variable isn't captured anywhere. + // Replace icmp eq against these globals with false, and icmp ne with + // true. + replaceICmpUsersWithFalse(GV); + } EraseUnusedGlobalValue(GV); + } NumAliases += DeadAliases.size(); for (GlobalAlias *GA : DeadAliases) Index: llvm/test/Transforms/GlobalDCE/icmp.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GlobalDCE/icmp.ll @@ -0,0 +1,79 @@ +; RUN: opt -S -passes=globaldce < %s | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@local = internal global i32 0 +@local2 = internal global i32 0 +@local3 = internal global i32 0 +@local4 = internal global i32 0 +@nonlocal = global i32 0 + +; CHECK-LABEL: @cmplocal( +; CHECK: ret i1 false +define i1 @cmplocal(ptr dereferenceable_or_null(4) %other) { + ; The typical case. + %result = icmp eq ptr %other, @local + ret i1 %result +} + +; CHECK-LABEL: @cmplocalne( +; CHECK: ret i1 true +define i1 @cmplocalne(ptr dereferenceable_or_null(4) %other) { + ; When the check is reversed, the result is true (not false). + %result = icmp ne ptr %other, @local + ret i1 %result +} + +; CHECK-LABEL: @cmplocalrev( +; CHECK: ret i1 false +define i1 @cmplocalrev(ptr dereferenceable_or_null(4) %other) { + ; Check that it also works with the operands reversed. + %result = icmp eq ptr @local, %other + ret i1 %result +} + +; CHECK-LABEL: @cmplocalself( +; CHECK: ret i1 true +define i1 @cmplocalself(ptr dereferenceable_or_null(4) %other) { + ; A comparison against itself won't make the global escape, but will always be + ; true. (This is probably also optimized by instcombine, but checking to be + ; sure). + %result = icmp eq ptr @local, @local + ret i1 %result +} + +; CHECK-LABEL: @cmpnonlocal( +; CHECK: ret i1 %result +define i1 @cmpnonlocal(ptr dereferenceable_or_null(4) %other) { + ; The optimization doesn't apply to external globals. + %result = icmp eq ptr %other, @nonlocal + ret i1 %result +} + +; CHECK-LABEL: @cmplocal2( +; CHECK: ret i1 %result +define i1 @cmplocal2(ptr %other) { + ; The optimization doesn't work without dereferenceable_or_null because then + ; we might use pointer tricks to obtain the value of @local anyway. + %result = icmp eq ptr %other, @local2 + ret i1 %result +} + +; CHECK-LABEL: @cmplocal3( +; CHECK: ret i1 false +define i1 @cmplocal3(ptr %other) { + ; ...but the optimization _does_ work with an inbouds gep, which provides the + ; same guarantee. + %gep = getelementptr inbounds i32, ptr %other, i64 1 + %result = icmp eq ptr %gep, @local3 + ret i1 %result +} + +; CHECK-LABEL: @cmplocal4( +; CHECK: ret i1 %result +define i1 @cmplocal4(ptr dereferenceable_or_null(4) %other) { + ; TODO: we might want to optimize this case. + %gep = getelementptr i32, ptr %other, i64 0 + %result = icmp eq ptr %gep, @local4 + ret i1 %result +}