Index: lib/Analysis/InstructionSimplify.cpp =================================================================== --- lib/Analysis/InstructionSimplify.cpp +++ lib/Analysis/InstructionSimplify.cpp @@ -1930,6 +1930,58 @@ return nullptr; } +// Check whether we can act as if this alloca has an unguessable address. It +// must not escape, and cannot be compared against more than once (by the cmp +// we're trying to fold). +static bool AllocaIsUnguessable(Value *V, const DataLayout &DL) { + unsigned MaxIter = 32; // Break cycles and bound to constant-time. + SmallVector Worklist; + for (Use &U : V->uses()) + Worklist.push_back(&U); + + unsigned NumCmps = 0; + while (!Worklist.empty()) { + if (Worklist.size() > MaxIter) + return false; + + Use *U = Worklist.pop_back_val(); + Instruction *I = cast(U->getUser()); + --MaxIter; + + if (isa(I) || isa(I) || isa(I) || + isa(I)) { + // Track the uses. + } else if (isa(I)) { + // Loading from the pointer doesn't escape it. + continue; + } else if (auto *SI = dyn_cast(I)) { + // Storing to the pointer is fine, but storing the pointer escapes it. + if (SI->getValueOperand() == U->get()) + return false; + continue; + } else if (isa(I)) { + if (NumCmps++) + return false; // Found more than one cmp. + continue; + } else if (auto *Intrin = dyn_cast(I)) { + switch (Intrin->getIntrinsicID()) { + // These intrinsics don't escape or compare the pointer. + case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::dbg_declare: case Intrinsic::dbg_value: + case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: + continue; + default: + return false; + } + } else { + return false; + } + for (Use &U : I->uses()) + Worklist.push_back(&U); + } + return true; +} + // A significant optimization not implemented here is assuming that alloca // addresses are not equal to incoming argument values. They don't *alias*, // as we say, but that doesn't mean they aren't equal, so we take a @@ -2122,6 +2174,16 @@ (IsNAC(RHSUObjs) && IsAllocDisjoint(LHSUObjs))) return ConstantInt::get(GetCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); + + // If an alloca cannot be observed (doesn't escape, isn't compared against + // except the current cmp), we can act as if the alloca has an unguessable + // value, and fold comparisons with other pointers to not equal. + if (isa(LHS) || isa(RHS)) { + AllocaInst *Alloca = cast(isa(LHS) ? LHS : RHS); + assert(LHS != RHS && "Self-comparison should already be folded!"); + if (AllocaIsUnguessable(Alloca, DL)) + return ConstantInt::get(GetCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); + } } // Otherwise, fail. Index: test/Transforms/InstSimplify/compare.ll =================================================================== --- test/Transforms/InstSimplify/compare.ll +++ test/Transforms/InstSimplify/compare.ll @@ -704,29 +704,112 @@ ret i1 %Y } -; It's not valid to fold a comparison of an argument with an alloca, even though -; that's tempting. An argument can't *alias* an alloca, however the aliasing rule -; relies on restrictions against guessing an object's address and dereferencing. -; There are no restrictions against guessing an object's address and comparing. +; It's not valid to fold a comparison of an argument with an alloca in the +; general case, even though that's tempting. An argument can't *alias* an +; alloca; however, the aliasing rule relies on restrictions against guessing +; an object's address and dereferencing. There are no restrictions against +; guessing an object's address and comparing. +; +; However, in cases where the alloca can never be observed (doesn't escape, +; isn't compared against except the cmp we're about to fold), we can act as +; if the alloca yields an unguessable address, and treat comparisons with +; *any* other pointer as not equal. define i1 @alloca_argument_compare(i64* %arg) { %alloc = alloca i64 %cmp = icmp eq i64* %arg, %alloc ret i1 %cmp - ; CHECK: alloca_argument_compare - ; CHECK: ret i1 %cmp + ; CHECK-LABEL: alloca_argument_compare + ; CHECK: ret i1 false } -; As above, but with the operands reversed. - define i1 @alloca_argument_compare_swapped(i64* %arg) { %alloc = alloca i64 %cmp = icmp eq i64* %alloc, %arg ret i1 %cmp - ; CHECK: alloca_argument_compare_swapped + ; CHECK-LABEL: alloca_argument_compare_swapped + ; CHECK: ret i1 false +} + +define i1 @alloca_argument_compare_ne(i64* %arg) { + %alloc = alloca i64 + %cmp = icmp ne i64* %arg, %alloc + ret i1 %cmp + ; CHECK-LABEL: alloca_argument_compare_ne + ; CHECK: ret i1 true +} + +define i1 @alloca_argument_compare_derived_ptrs(i64* %arg, i64 %x) { + %alloc = alloca i64, i64 8 + %p = getelementptr i64, i64* %arg, i64 %x + %q = getelementptr i64, i64* %alloc, i64 3 + %cmp = icmp eq i64* %p, %q + ret i1 %cmp + ; CHECK-LABEL: alloca_argument_compare_derived_ptrs + ; CHECK: ret i1 false +} + +declare void @escape(i64*) +define i1 @alloca_argument_compare_escaped_alloca(i64* %arg) { + %alloc = alloca i64 + call void @escape(i64* %alloc) + %cmp = icmp eq i64* %arg, %alloc + ret i1 %cmp + ; CHECK-LABEL: alloca_argument_compare_escaped_alloca + ; CHECK: %cmp = icmp eq i64* %arg, %alloc ; CHECK: ret i1 %cmp } +declare void @check_compares(i1, i1) +define void @alloca_argument_compare_two_compares(i64* %p) { + %q = alloca i64, i64 8 + %r = getelementptr i64, i64* %p, i64 1 + %s = getelementptr i64, i64* %q, i64 2 + %cmp1 = icmp eq i64* %p, %q + %cmp2 = icmp eq i64* %r, %s + call void @check_compares(i1 %cmp1, i1 %cmp2) + ret void + ; We will only fold if there is a single cmp. + ; CHECK-LABEL: alloca_argument_compare_two_compares + ; CHECK: call void @check_compares(i1 %cmp1, i1 %cmp2) +} + +define i1 @alloca_argument_compare_escaped_through_store(i64* %arg, i64** %ptr) { + %alloc = alloca i64 + %cmp = icmp eq i64* %arg, %alloc + %p = getelementptr i64, i64* %alloc, i64 1 + store i64* %p, i64** %ptr + ret i1 %cmp + ; CHECK-LABEL: alloca_argument_compare_escaped_through_store + ; CHECK: %cmp = icmp eq i64* %arg, %alloc + ; CHECK: ret i1 %cmp +} + +declare void @llvm.lifetime.start(i64, i8* nocapture) +declare void @llvm.lifetime.end(i64, i8* nocapture) +define i1 @alloca_argument_compare_benign_instrs(i8* %arg) { + %alloc = alloca i8 + call void @llvm.lifetime.start(i64 1, i8* %alloc) + %cmp = icmp eq i8* %arg, %alloc + %x = load i8, i8* %arg + store i8 %x, i8* %alloc + call void @llvm.lifetime.end(i64 1, i8* %alloc) + ret i1 %cmp + ; CHECK-LABEL: alloca_argument_compare_benign_instrs + ; CHECK: ret i1 false +} + +declare i64* @allocator() +define i1 @alloca_call_compare() { + %p = alloca i64 + %q = call i64* @allocator() + %cmp = icmp eq i64* %p, %q + ret i1 %cmp + ; CHECK-LABEL: alloca_call_compare + ; CHECK: ret i1 false +} + + ; Don't assume that a noalias argument isn't equal to a global variable's ; address. This is an example where AliasAnalysis' NoAlias concept is ; different from actual pointer inequality. Index: test/Transforms/InstSimplify/noalias-ptr.ll =================================================================== --- test/Transforms/InstSimplify/noalias-ptr.ll +++ test/Transforms/InstSimplify/noalias-ptr.ll @@ -223,26 +223,6 @@ ret void } -define void @_Z2p2v(i32 %c) #0 { - %mStackData = alloca [10 x i32], i32 %c, align 16 - %1 = bitcast [10 x i32]* %mStackData to i8* - %2 = tail call noalias i8* @_Znam(i64 48) #4 - %3 = bitcast i8* %2 to i32* - %4 = getelementptr inbounds [10 x i32], [10 x i32]* %mStackData, i64 0, i64 0 - %5 = icmp eq i32* %3, %4 - br i1 %5, label %7, label %6 - -; CHECK-LABEL: @_Z2p2v -; CHECK: icmp -; CHECK: ret void - -;