Index: llvm/trunk/lib/Analysis/CaptureTracking.cpp =================================================================== --- llvm/trunk/lib/Analysis/CaptureTracking.cpp +++ llvm/trunk/lib/Analysis/CaptureTracking.cpp @@ -300,7 +300,7 @@ Worklist.push_back(&UU); } break; - case Instruction::ICmp: + case Instruction::ICmp: { // Don't count comparisons of a no-alias return value against null as // captures. This allows us to ignore comparisons of malloc results // with null, for example. @@ -309,11 +309,19 @@ if (CPN->getType()->getAddressSpace() == 0) if (isNoAliasCall(V->stripPointerCasts())) break; + // Comparison against value stored in global variable. Given the pointer + // does not escape, its value cannot be guessed and stored separately in a + // global variable. + unsigned OtherIndex = (I->getOperand(0) == V) ? 1 : 0; + auto *LI = dyn_cast(I->getOperand(OtherIndex)); + if (LI && isa(LI->getPointerOperand())) + break; // Otherwise, be conservative. There are crazy ways to capture pointers // using comparisons. if (Tracker->captured(U)) return; break; + } default: // Something else - be conservative and say it is captured. if (Tracker->captured(U)) Index: llvm/trunk/lib/Analysis/InstructionSimplify.cpp =================================================================== --- llvm/trunk/lib/Analysis/InstructionSimplify.cpp +++ llvm/trunk/lib/Analysis/InstructionSimplify.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" @@ -1922,10 +1923,10 @@ // If the C and C++ standards are ever made sufficiently restrictive in this // area, it may be possible to update LLVM's semantics accordingly and reinstate // this optimization. -static Constant *computePointerICmp(const DataLayout &DL, - const TargetLibraryInfo *TLI, - CmpInst::Predicate Pred, Value *LHS, - Value *RHS) { +static Constant * +computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, + const DominatorTree *DT, CmpInst::Predicate Pred, + const Instruction *CxtI, Value *LHS, Value *RHS) { // First, skip past any trivial no-ops. LHS = LHS->stripPointerCasts(); RHS = RHS->stripPointerCasts(); @@ -2081,6 +2082,21 @@ (IsNAC(RHSUObjs) && IsAllocDisjoint(LHSUObjs))) return ConstantInt::get(GetCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); + + // Fold comparisons for non-escaping pointer even if the allocation call + // cannot be elided. We cannot fold malloc comparison to null. Also, the + // dynamic allocation call could be either of the operands. + Value *MI = nullptr; + if (isAllocLikeFn(LHS, TLI) && llvm::isKnownNonNullAt(RHS, CxtI, DT, TLI)) + MI = LHS; + else if (isAllocLikeFn(RHS, TLI) && + llvm::isKnownNonNullAt(LHS, CxtI, DT, TLI)) + MI = RHS; + // FIXME: We should also fold the compare when the pointer escapes, but the + // compare dominates the pointer escape + if (MI && !PointerMayBeCaptured(MI, true, true)) + return ConstantInt::get(GetCompareTy(LHS), + CmpInst::isFalseWhenEqual(Pred)); } // Otherwise, fail. @@ -3027,7 +3043,7 @@ // Simplify comparisons of related pointers using a powerful, recursive // GEP-walk when we have target data available.. if (LHS->getType()->isPointerTy()) - if (Constant *C = computePointerICmp(Q.DL, Q.TLI, Pred, LHS, RHS)) + if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.CxtI, LHS, RHS)) return C; if (GetElementPtrInst *GLHS = dyn_cast(LHS)) { Index: llvm/trunk/test/Transforms/InstCombine/compare-unescaped.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/compare-unescaped.ll +++ llvm/trunk/test/Transforms/InstCombine/compare-unescaped.ll @@ -21,24 +21,66 @@ %cmp = icmp ne i32* %bc, %lgp ret i1 %cmp ; CHECK-LABEL: compare_global_trivialne -; CHECK: ret i1 true +; CHECK: ret i1 true } ; Although the %m is marked nocapture in the deopt operand in call to function f, ; we cannot remove the alloc site: call to malloc -; FIXME: The comparison should fold to false irrespective of whether the call to malloc can be elided or not +; The comparison should fold to false irrespective of whether the call to malloc can be elided or not declare void @f() -define i32 @compare_and_call_with_deopt() { +define i1 @compare_and_call_with_deopt() { ; CHECK-LABEL: compare_and_call_with_deopt %m = call i8* @malloc(i64 24) %bc = bitcast i8* %m to i32* - %lgp = load i32*, i32** @gp, align 8 - %cmp = icmp eq i32* %bc, %lgp - %rt = zext i1 %cmp to i32 + %lgp = load i32*, i32** @gp, align 8, !nonnull !0 + %cmp = icmp eq i32* %lgp, %bc tail call void @f() [ "deopt"(i8* %m) ] - ret i32 %rt -; CHECK: ret i32 %rt + ret i1 %cmp +; CHECK: ret i1 false +} + +; Same functon as above with deopt operand in function f, but comparison is NE +define i1 @compare_ne_and_call_with_deopt() { +; CHECK-LABEL: compare_ne_and_call_with_deopt + %m = call i8* @malloc(i64 24) + %bc = bitcast i8* %m to i32* + %lgp = load i32*, i32** @gp, align 8, !nonnull !0 + %cmp = icmp ne i32* %lgp, %bc + tail call void @f() [ "deopt"(i8* %m) ] + ret i1 %cmp +; CHECK: ret i1 true +} + +; Same function as above, but global not marked nonnull, and we cannot fold the comparison +define i1 @compare_ne_global_maybe_null() { +; CHECK-LABEL: compare_ne_global_maybe_null + %m = call i8* @malloc(i64 24) + %bc = bitcast i8* %m to i32* + %lgp = load i32*, i32** @gp + %cmp = icmp ne i32* %lgp, %bc + tail call void @f() [ "deopt"(i8* %m) ] + ret i1 %cmp +; CHECK: ret i1 %cmp +} + +; FIXME: The comparison should fold to false since %m escapes (call to function escape) +; after the comparison. +declare void @escape(i8*) +define i1 @compare_and_call_after() { +; CHECK-LABEL: compare_and_call_after + %m = call i8* @malloc(i64 24) + %bc = bitcast i8* %m to i32* + %lgp = load i32*, i32** @gp, align 8, !nonnull !0 + %cmp = icmp eq i32* %bc, %lgp + br i1 %cmp, label %escape_call, label %just_return + +escape_call: + call void @escape(i8* %m) + ret i1 true + +just_return: + ret i1 %cmp } define i1 @compare_distinct_mallocs() { @@ -63,7 +105,7 @@ } ; the compare is folded to true since the folding compare looks through bitcasts. -; call to malloc and the bitcast instructions are elided after that since there are no uses of the malloc +; The malloc call for %m cannot be elided since it is used in the call to function f. define i1 @compare_samepointer_escaped() { %m = call i8* @malloc(i64 4) %bc = bitcast i8* %m to i32* @@ -77,6 +119,34 @@ ; CHECK: ret i1 true } +; Technically, we can fold the %cmp2 comparison, even though %m escapes through +; the ret statement since `ret` terminates the function and we cannot reach from +; the ret to cmp. +; FIXME: Folding this %cmp2 when %m escapes through ret could be an issue with +; cross-threading data dependencies since we do not make the distinction between +; atomic and non-atomic loads in capture tracking. +define i8* @compare_ret_escape(i8* %c) { + %m = call i8* @malloc(i64 4) + %n = call i8* @malloc(i64 4) + %cmp = icmp eq i8* %n, %c + br i1 %cmp, label %retst, label %chk + +retst: + ret i8* %m + +chk: + %bc = bitcast i8* %m to i32* + %lgp = load i32*, i32** @gp, align 8, !nonnull !0 + %cmp2 = icmp eq i32* %bc, %lgp + br i1 %cmp2, label %retst, label %chk2 + +chk2: + ret i8* %n +; CHECK-LABEL: compare_ret_escape +; CHECK: %cmp = icmp eq i8* %n, %c +; CHECK: %cmp2 = icmp eq i32* %bc, %lgp +} + ; The malloc call for %m cannot be elided since it is used in the call to function f. ; However, the cmp can be folded to true as %n doesnt escape and %m, %n are distinct allocations define i1 @compare_distinct_pointer_escape() { @@ -90,3 +160,5 @@ ; CHECK-NEXT: tail call void @f() [ "deopt"(i8* %m) ] ; CHECK-NEXT: ret i1 true } + +!0 = !{}