Index: llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/APSInt.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" @@ -898,20 +899,20 @@ return transformToIndexedCompare(GEPLHS, RHS, Cond, DL); } -Instruction *InstCombinerImpl::foldAllocaCmp(ICmpInst &ICI, - const AllocaInst *Alloca) { +Instruction *InstCombinerImpl::foldAllocCmp(ICmpInst &ICI, const Value *Alloc) { assert(ICI.isEquality() && "Cannot fold non-equality comparison."); - // It would be tempting to fold away comparisons between allocas and any - // pointer not based on that alloca (e.g. an argument). However, even - // though such pointers cannot alias, they can still compare equal. + // It would be tempting to fold away comparisons between alloca or noalias + // calls and any pointer not based on that allocation (e.g. an argument). + // However, even though such pointers cannot alias, they can still compare + // equal. // - // But LLVM doesn't specify where allocas get their memory, so if the alloca - // doesn't escape we can argue that it's impossible to guess its value, and we - // can therefore act as if any such guesses are wrong. + // But LLVM doesn't specify where allocas or noalias calls get their memory, + // so if the alloca doesn't escape we can argue that it's impossible to guess + // its value, and we can therefore act as if any such guesses are wrong. // - // The code below checks that the alloca doesn't escape, and that it's only - // used in a comparison once (the current instruction). The + // The code below checks that the allocation doesn't escape, and that it's + // only used in a comparison once (the current instruction). The // single-comparison-use condition ensures that we're trivially folding all // comparisons against the alloca consistently, and avoids the risk of // erroneously folding a comparison of the pointer with itself. @@ -934,7 +935,7 @@ }; CmpCaptureTracker Tracker; - PointerMayBeCaptured(Alloca, &Tracker); + PointerMayBeCaptured(Alloc, &Tracker); if (Tracker.Captured) return nullptr; @@ -6442,14 +6443,31 @@ if (Instruction *NI = foldSelectICmp(I.getSwappedPredicate(), SI, Op0, I)) return NI; - // Try to optimize equality comparisons against alloca-based pointers. + // Try to optimize equality comparisons against memory allocations. if (Op0->getType()->isPointerTy() && I.isEquality()) { - assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?"); - if (auto *Alloca = dyn_cast(getUnderlyingObject(Op0))) - if (Instruction *New = foldAllocaCmp(I, Alloca)) + assert(Op1->getType()->isPointerTy() && + "Comparing pointer with non-pointer?"); + auto CanFold = [&](const Value *UO, const Value *Op, const Value *Other) { + if (isa(UO)) + return true; + // Allocators might return null values. We can only fold comparisons to + // false if either the allocator can't return null, or we have a direct + // comparison against a non-null value. (We don't want to reason about + // hidden null checks like (gep malloc, X) == inttoptr(X) here.) + if (isNoAliasCall(UO)) + return isKnownNonZero(UO, Q.DL, /*Depth*/ 0, Q.AC, &I, Q.DT) || + (UO == Op && + isKnownNonZero(Other, Q.DL, /*Depth*/ 0, Q.AC, &I, Q.DT)); + return false; + }; + + Value *UO0 = getUnderlyingObject(Op0); + if (CanFold(UO0, Op0, Op1)) + if (Instruction *New = foldAllocCmp(I, UO0)) return New; - if (auto *Alloca = dyn_cast(getUnderlyingObject(Op1))) - if (Instruction *New = foldAllocaCmp(I, Alloca)) + Value *UO1 = getUnderlyingObject(Op1); + if (CanFold(UO1, Op1, Op0)) + if (Instruction *New = foldAllocCmp(I, UO1)) return New; } Index: llvm/lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -550,7 +550,7 @@ ICmpInst::Predicate Cond, Instruction &I); Instruction *foldSelectICmp(ICmpInst::Predicate Pred, SelectInst *SI, Value *RHS, const ICmpInst &I); - Instruction *foldAllocaCmp(ICmpInst &ICI, const AllocaInst *Alloca); + Instruction *foldAllocCmp(ICmpInst &ICI, const Value *Alloc); Instruction *foldCmpLoadFromIndexedGlobal(LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, Index: llvm/test/Transforms/InstCombine/compare-unescaped.ll =================================================================== --- llvm/test/Transforms/InstCombine/compare-unescaped.ll +++ llvm/test/Transforms/InstCombine/compare-unescaped.ll @@ -202,12 +202,9 @@ declare ptr @hidden_inttoptr() declare ptr @hidden_offset(ptr %other) -; FIXME: Missed oppurtunity define i1 @ptrtoint_single_cmp() { ; CHECK-LABEL: @ptrtoint_single_cmp( -; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[M]], inttoptr (i64 2048 to ptr) -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %m = call ptr @malloc(i64 4) %rhs = inttoptr i64 2048 to ptr @@ -312,8 +309,7 @@ ; CHECK-LABEL: @consistent_nocapture_inttoptr( ; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4) ; CHECK-NEXT: call void @unknown(ptr nocapture [[M]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[M]], inttoptr (i64 2048 to ptr) -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %m = call ptr @malloc(i64 4) call void @unknown(ptr nocapture %m) @@ -377,15 +373,9 @@ ret i1 %cmp } -; TODO: We can fold this, but don't with the current scheme. define i1 @two_nonnull_mallocs_hidden() { ; CHECK-LABEL: @two_nonnull_mallocs_hidden( -; CHECK-NEXT: [[M:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4) -; CHECK-NEXT: [[N:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4) -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[M]], i64 1 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[N]], i64 2 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[GEP1]], [[GEP2]] -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %m = call nonnull ptr @malloc(i64 4) %n = call nonnull ptr @malloc(i64 4)