Index: llvm/lib/Analysis/CaptureTracking.cpp =================================================================== --- llvm/lib/Analysis/CaptureTracking.cpp +++ llvm/lib/Analysis/CaptureTracking.cpp @@ -433,12 +433,6 @@ break; } } - // Comparison against value stored in global variable. Given the pointer - // does not escape, its value cannot be guessed and stored separately in a - // global variable. - auto *LI = dyn_cast(I->getOperand(OtherIdx)); - if (LI && isa(LI->getPointerOperand())) - break; // Otherwise, be conservative. There are crazy ways to capture pointers // using comparisons. if (Tracker->captured(U)) Index: llvm/lib/Analysis/InstructionSimplify.cpp =================================================================== --- llvm/lib/Analysis/InstructionSimplify.cpp +++ llvm/lib/Analysis/InstructionSimplify.cpp @@ -2604,8 +2604,6 @@ const SimplifyQuery &Q) { const DataLayout &DL = Q.DL; const TargetLibraryInfo *TLI = Q.TLI; - const DominatorTree *DT = Q.DT; - const Instruction *CxtI = Q.CxtI; const InstrInfoQuery &IIQ = Q.IIQ; // First, skip past any trivial no-ops. @@ -2711,24 +2709,6 @@ (IsNAC(RHSUObjs) && IsAllocDisjoint(LHSUObjs))) return ConstantInt::get(GetCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); - - // Fold comparisons for non-escaping pointer even if the allocation call - // cannot be elided. We cannot fold malloc comparison to null. Also, the - // dynamic allocation call could be either of the operands. Note that - // the other operand can not be based on the alloc - if it were, then - // the cmp itself would be a capture. - Value *MI = nullptr; - if (isAllocLikeFn(LHS, TLI) && - llvm::isKnownNonZero(RHS, DL, 0, nullptr, CxtI, DT)) - MI = LHS; - else if (isAllocLikeFn(RHS, TLI) && - llvm::isKnownNonZero(LHS, DL, 0, nullptr, CxtI, DT)) - MI = RHS; - // FIXME: We should also fold the compare when the pointer escapes, but the - // compare dominates the pointer escape - if (MI && !PointerMayBeCaptured(MI, true, true)) - return ConstantInt::get(GetCompareTy(LHS), - CmpInst::isFalseWhenEqual(Pred)); } // Otherwise, fail. Index: llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -14,6 +14,8 @@ #include "llvm/ADT/APSInt.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -1005,75 +1007,50 @@ return transformToIndexedCompare(GEPLHS, RHS, Cond, DL); } -Instruction *InstCombinerImpl::foldAllocaCmp(ICmpInst &ICI, - const AllocaInst *Alloca) { +Instruction *InstCombinerImpl::foldAllocCmp(ICmpInst &ICI, + const Value *Allocation) { assert(ICI.isEquality() && "Cannot fold non-equality comparison."); - // It would be tempting to fold away comparisons between allocas and any - // pointer not based on that alloca (e.g. an argument). However, even - // though such pointers cannot alias, they can still compare equal. + // It would be tempting to fold away comparisons between allocas or noalias + // calls and any pointer not based on that allocation (e.g. an argument). + // However, even though such pointers cannot alias, they can still compare + // equal. // - // But LLVM doesn't specify where allocas get their memory, so if the alloca - // doesn't escape we can argue that it's impossible to guess its value, and we - // can therefore act as if any such guesses are wrong. + // But LLVM doesn't specify where allocas or noalias calls get their memory, + // so if the allocation doesn't escape we can argue that it's impossible to + // guess its value, and we can therefore act as if any such guesses are wrong. // - // The code below checks that the alloca doesn't escape, and that it's only - // used in a comparison once (the current instruction). The + // The code below checks that the allocation doesn't escape, and that it's + // only used in a comparison once (the current instruction). The // single-comparison-use condition ensures that we're trivially folding all // comparisons against the alloca consistently, and avoids the risk of // erroneously folding a comparison of the pointer with itself. - unsigned MaxIter = 32; // Break cycles and bound to constant-time. + struct CustomCaptureTracker : public CaptureTracker { + explicit CustomCaptureTracker() {} - SmallVector Worklist; - for (const Use &U : Alloca->uses()) { - if (Worklist.size() >= MaxIter) - return nullptr; - Worklist.push_back(&U); - } + void tooManyUses() override { Captured = true; } - unsigned NumCmps = 0; - while (!Worklist.empty()) { - assert(Worklist.size() <= MaxIter); - const Use *U = Worklist.pop_back_val(); - const Value *V = U->getUser(); - --MaxIter; + bool captured(const Use *U) override { + NumCmps++; + if (NumCmps == 1) + // Exactly one compare use seen + return false; - if (isa(V) || isa(V) || isa(V) || - isa(V)) { - // Track the uses. - } else if (isa(V)) { - // Loading from the pointer doesn't escape it. - continue; - } else if (const auto *SI = dyn_cast(V)) { - // Storing *to* the pointer is fine, but storing the pointer escapes it. - if (SI->getValueOperand() == U->get()) - return nullptr; - continue; - } else if (isa(V)) { - if (NumCmps++) - return nullptr; // Found more than one cmp. - continue; - } else if (const auto *Intrin = dyn_cast(V)) { - switch (Intrin->getIntrinsicID()) { - // These intrinsics don't escape or compare the pointer. Memset is safe - // because we don't allow ptrtoint. Memcpy and memmove are safe because - // we don't allow stores, so src cannot point to V. - case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: - case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: - continue; - default: - return nullptr; - } - } else { - return nullptr; - } - for (const Use &U : V->uses()) { - if (Worklist.size() >= MaxIter) - return nullptr; - Worklist.push_back(&U); + Captured = true; + return true; } - } + + unsigned NumCmps = 0; + bool Captured = false; + }; + + CustomCaptureTracker Tracker; + const unsigned MaxIter = 32; // Break cycles and bound to constant-time. + PointerMayBeCaptured(Allocation, &Tracker, MaxIter); + + if (Tracker.Captured) + return nullptr; auto *Res = ConstantInt::get(ICI.getType(), !CmpInst::isTrueWhenEqual(ICI.getPredicate())); @@ -6055,15 +6032,33 @@ if (Instruction *NI = foldGEPICmp(GEP, Op0, I.getSwappedPredicate(), I)) return NI; - // Try to optimize equality comparisons against alloca-based pointers. + // Try to optimize equality comparisons against memory allocations + // (e.g. alloca or noalias calls) if (Op0->getType()->isPointerTy() && I.isEquality()) { assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?"); - if (auto *Alloca = dyn_cast(getUnderlyingObject(Op0))) - if (Instruction *New = foldAllocaCmp(I, Alloca)) + auto *UO0 = getUnderlyingObject(Op0); + if (isa(UO0)) + if (Instruction *New = foldAllocCmp(I, UO0)) return New; - if (auto *Alloca = dyn_cast(getUnderlyingObject(Op1))) - if (Instruction *New = foldAllocaCmp(I, Alloca)) + auto *UO1 = getUnderlyingObject(Op1); + if (isa(UO1)) + if (Instruction *New = foldAllocCmp(I, UO1)) return New; + + if (isNoAliasCall(UO0) || isNoAliasCall(UO1)) { + // noalias results are allowed to be null. We need to make sure that + // either a) the result isn't null at the use, or b) the result being + // null still results in it not being equal to the other side. + if (isKnownNonZero(Op0, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT) || + isKnownNonZero(Op1, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) { + if (isNoAliasCall(UO0)) + if (Instruction *New = foldAllocCmp(I, UO0)) + return New; + if (isNoAliasCall(UO1)) + if (Instruction *New = foldAllocCmp(I, UO1)) + return New; + } + } } if (Instruction *Res = foldICmpBitCast(I)) Index: llvm/lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -650,7 +650,7 @@ Instruction *foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I); - Instruction *foldAllocaCmp(ICmpInst &ICI, const AllocaInst *Alloca); + Instruction *foldAllocCmp(ICmpInst &ICI, const Value *Allocation); Instruction *foldCmpLoadFromIndexedGlobal(LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, Index: llvm/test/Transforms/InstCombine/compare-alloca.ll =================================================================== --- llvm/test/Transforms/InstCombine/compare-alloca.ll +++ llvm/test/Transforms/InstCombine/compare-alloca.ll @@ -238,14 +238,12 @@ declare void @unknown(i8*) -; TODO: Missing optimization define i1 @consistent_nocapture_inttoptr() { ; CHECK-LABEL: @consistent_nocapture_inttoptr( ; CHECK-NEXT: [[M1:%.*]] = alloca [4 x i8], align 1 ; CHECK-NEXT: [[M1_SUB:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[M1]], i32 0, i32 0 ; CHECK-NEXT: call void @unknown(i8* nocapture nonnull [[M1_SUB]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[M1_SUB]], inttoptr (i64 2048 to i8*) -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %m = alloca i8, i32 4 call void @unknown(i8* nocapture %m) @@ -270,15 +268,12 @@ } @gp = global i32* null, align 8 -; TODO: Missing optimization define i1 @consistent_nocapture_through_global() { ; CHECK-LABEL: @consistent_nocapture_through_global( ; CHECK-NEXT: [[M1:%.*]] = alloca i32, align 1 ; CHECK-NEXT: [[M1_SUB:%.*]] = bitcast i32* [[M1]] to i8* ; CHECK-NEXT: call void @unknown(i8* nocapture nonnull [[M1_SUB]]) -; CHECK-NEXT: [[LGP:%.*]] = load i32*, i32** @gp, align 8, !nonnull !0 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[M1]], [[LGP]] -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %m = alloca i8, i32 4 call void @unknown(i8* nocapture %m) Index: llvm/test/Transforms/InstCombine/compare-unescaped.ll =================================================================== --- llvm/test/Transforms/InstCombine/compare-unescaped.ll +++ llvm/test/Transforms/InstCombine/compare-unescaped.ll @@ -216,12 +216,9 @@ declare i8* @hidden_inttoptr() declare i8* @hidden_offset(i8* %other) -; FIXME: Missed oppurtunity define i1 @ptrtoint_single_cmp() { ; CHECK-LABEL: @ptrtoint_single_cmp( -; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[M]], inttoptr (i64 2048 to i8*) -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %m = call i8* @malloc(i64 4) %rhs = inttoptr i64 2048 to i8* @@ -302,9 +299,6 @@ ret void } -; FIXME: This appears correct, but the current implementation relies -; on visiting both cmps in the same pass. We may have an simplification order -; under which one is missed, and that would be a bug. define void @neg_consistent_fold4() { ; CHECK-LABEL: @neg_consistent_fold4( ; CHECK-NEXT: call void @witness(i1 false, i1 false) @@ -329,8 +323,7 @@ ; CHECK-LABEL: @consistent_nocapture_inttoptr( ; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4) ; CHECK-NEXT: call void @unknown(i8* nocapture [[M]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[M]], inttoptr (i64 2048 to i8*) -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %m = call i8* @malloc(i64 4) call void @unknown(i8* nocapture %m)