Index: llvm/trunk/include/llvm/Transforms/Utils/Local.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/Local.h +++ llvm/trunk/include/llvm/Transforms/Utils/Local.h @@ -356,6 +356,10 @@ /// Unknown metadata is removed. void combineMetadataForCSE(Instruction *K, const Instruction *J); +// Replace each use of 'From' with 'To', if that use does not belong to basic +// block where 'From' is defined. Returns the number of replacements made. +unsigned replaceNonLocalUsesWith(Instruction *From, Value *To); + /// Replace each use of 'From' with 'To' if that use is dominated by /// the given edge. Returns the number of replacements made. unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, Index: llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp +++ llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp @@ -253,6 +253,35 @@ return EverChanged; } +// Replace uses of Cond with ToVal when safe to do so. If all uses are +// replaced, we can remove Cond. We cannot blindly replace all uses of Cond +// because we may incorrectly replace uses when guards/assumes are uses of +// of `Cond` and we used the guards/assume to reason about the `Cond` value +// at the end of block. RAUW unconditionally replaces all uses +// including the guards/assumes themselves and the uses before the +// guard/assume. +static void ReplaceFoldableUses(Instruction *Cond, Value *ToVal) { + assert(Cond->getType() == ToVal->getType()); + auto *BB = Cond->getParent(); + // We can unconditionally replace all uses in non-local blocks (i.e. uses + // strictly dominated by BB), since LVI information is true from the + // terminator of BB. + replaceNonLocalUsesWith(Cond, ToVal); + for (Instruction &I : reverse(*BB)) { + // Reached the Cond whose uses we are trying to replace, so there are no + // more uses. + if (&I == Cond) + break; + // We only replace uses in instructions that are guaranteed to reach the end + // of BB, where we know Cond is ToVal. + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) + break; + I.replaceUsesOfWith(Cond, ToVal); + } + if (Cond->use_empty() && !Cond->mayHaveSideEffects()) + Cond->eraseFromParent(); +} + /// Return the cost of duplicating a piece of this block from first non-phi /// and before StopAt instruction to thread across it. Stop scanning the block /// when exceeding the threshold. If duplication is impossible, returns ~0U. @@ -833,13 +862,19 @@ CondBr->eraseFromParent(); if (CondCmp->use_empty()) CondCmp->eraseFromParent(); - // TODO: We can safely replace *some* uses of the CondInst if it has + // We can safely replace *some* uses of the CondInst if it has // exactly one value as returned by LVI. RAUW is incorrect in the // presence of guards and assumes, that have the `Cond` as the use. This // is because we use the guards/assume to reason about the `Cond` value // at the end of block, but RAUW unconditionally replaces all uses // including the guards/assumes themselves and the uses before the // guard/assume. + else if (CondCmp->getParent() == BB) { + auto *CI = Ret == LazyValueInfo::True ? + ConstantInt::getTrue(CondCmp->getType()) : + ConstantInt::getFalse(CondCmp->getType()); + ReplaceFoldableUses(CondCmp, CI); + } return true; } @@ -1325,13 +1360,16 @@ if (auto *CondInst = dyn_cast(Cond)) { if (CondInst->use_empty() && !CondInst->mayHaveSideEffects()) CondInst->eraseFromParent(); - // TODO: We can safely replace *some* uses of the CondInst if it has + // We can safely replace *some* uses of the CondInst if it has // exactly one value as returned by LVI. RAUW is incorrect in the // presence of guards and assumes, that have the `Cond` as the use. This // is because we use the guards/assume to reason about the `Cond` value // at the end of block, but RAUW unconditionally replaces all uses // including the guards/assumes themselves and the uses before the // guard/assume. + else if (OnlyVal && OnlyVal != MultipleVal && + CondInst->getParent() == BB) + ReplaceFoldableUses(CondInst, OnlyVal); } return true; } Index: llvm/trunk/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/Local.cpp +++ llvm/trunk/lib/Transforms/Utils/Local.cpp @@ -1796,6 +1796,23 @@ return Count; } +unsigned llvm::replaceNonLocalUsesWith(Instruction *From, Value *To) { + assert(From->getType() == To->getType()); + auto *BB = From->getParent(); + unsigned Count = 0; + + for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); + UI != UE;) { + Use &U = *UI++; + auto *I = cast(U.getUser()); + if (I->getParent() == BB) + continue; + U.set(To); + ++Count; + } + return Count; +} + unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Root) { Index: llvm/trunk/test/Transforms/JumpThreading/assume.ll =================================================================== --- llvm/trunk/test/Transforms/JumpThreading/assume.ll +++ llvm/trunk/test/Transforms/JumpThreading/assume.ll @@ -59,12 +59,12 @@ @g = external global i32 ; Check that we do prove a fact using an assume within the block. -; FIXME: We can fold the assume based on the semantics of assume. -; CHECK-LABEL: @can_fold_assume -; CHECK: %notnull = icmp ne i32* %array, null -; CHECK-NEXT: call void @llvm.assume(i1 %notnull) -; CHECK-NEXT: ret void +; We can fold the assume based on the semantics of assume. define void @can_fold_assume(i32* %array) { +; CHECK-LABEL: @can_fold_assume +; CHECK-NOT: call void @llvm.assume +; CHECK-NOT: br +; CHECK: ret void %notnull = icmp ne i32* %array, null call void @llvm.assume(i1 %notnull) br i1 %notnull, label %normal, label %error @@ -80,19 +80,128 @@ declare void @f(i1) declare void @exit() ; We can fold the assume but not the uses before the assume. -define void @dont_fold_incorrectly(i32* %array) { -; CHECK-LABEL:@dont_fold_incorrectly +define void @cannot_fold_use_before_assume(i32* %array) { +; CHECK-LABEL:@cannot_fold_use_before_assume ; CHECK: @f(i1 %notnull) ; CHECK-NEXT: exit() -; CHECK-NEXT: assume(i1 %notnull) +; CHECK-NOT: assume +; CHECK-NEXT: ret void + %notnull = icmp ne i32* %array, null + call void @f(i1 %notnull) + call void @exit() + call void @llvm.assume(i1 %notnull) + br i1 %notnull, label %normal, label %error + +normal: + ret void + +error: + store atomic i32 0, i32* @g unordered, align 4 + ret void +} + +declare void @dummy(i1) nounwind argmemonly +define void @can_fold_some_use_before_assume(i32* %array) { + +; CHECK-LABEL:@can_fold_some_use_before_assume +; CHECK: @f(i1 %notnull) +; CHECK-NEXT: @dummy(i1 true) +; CHECK-NOT: assume ; CHECK-NEXT: ret void %notnull = icmp ne i32* %array, null call void @f(i1 %notnull) + call void @dummy(i1 %notnull) + call void @llvm.assume(i1 %notnull) + br i1 %notnull, label %normal, label %error + +normal: + ret void + +error: + store atomic i32 0, i32* @g unordered, align 4 + ret void + +} + +; FIXME: can fold assume and all uses before/after assume. +; because the trapping exit call is after the assume. +define void @can_fold_assume_and_all_uses(i32* %array) { +; CHECK-LABEL:@can_fold_assume_and_all_uses +; CHECK: @dummy(i1 %notnull) +; CHECK-NEXT: assume(i1 %notnull) +; CHECK-NEXT: exit() +; CHECK-NEXT: %notnull2 = or i1 true, false +; CHECK-NEXT: @f(i1 %notnull2) +; CHECK-NEXT: ret void + %notnull = icmp ne i32* %array, null + call void @dummy(i1 %notnull) + call void @llvm.assume(i1 %notnull) call void @exit() + br i1 %notnull, label %normal, label %error + +normal: + %notnull2 = or i1 %notnull, false + call void @f(i1 %notnull2) + ret void + +error: + store atomic i32 0, i32* @g unordered, align 4 + ret void +} + +declare void @fz(i8) +; FIXME: We can fold assume to true, and the use after assume, but we do not do so +; currently, because of the function call after the assume. +define void @can_fold_assume2(i32* %array) { + +; CHECK-LABEL:@can_fold_assume2 +; CHECK: @f(i1 %notnull) +; CHECK-NEXT: assume(i1 %notnull) +; CHECK-NEXT: znotnull = zext i1 %notnull to i8 +; CHECK-NEXT: @f(i1 %notnull) +; CHECK-NEXT: @f(i1 true) +; CHECK-NEXT: @fz(i8 %znotnull) +; CHECK-NEXT: ret void + %notnull = icmp ne i32* %array, null + call void @f(i1 %notnull) + call void @llvm.assume(i1 %notnull) + %znotnull = zext i1 %notnull to i8 + call void @f(i1 %notnull) + br i1 %notnull, label %normal, label %error + +normal: + call void @f(i1 %notnull) + call void @fz(i8 %znotnull) + ret void + +error: + store atomic i32 0, i32* @g unordered, align 4 + ret void +} + +declare void @llvm.experimental.guard(i1, ...) +; FIXME: We can fold assume to true, but we do not do so +; because of the guard following the assume. +define void @can_fold_assume3(i32* %array){ + +; CHECK-LABEL:@can_fold_assume3 +; CHECK: @f(i1 %notnull) +; CHECK-NEXT: assume(i1 %notnull) +; CHECK-NEXT: guard(i1 %notnull) +; CHECK-NEXT: znotnull = zext i1 true to i8 +; CHECK-NEXT: @f(i1 true) +; CHECK-NEXT: @fz(i8 %znotnull) +; CHECK-NEXT: ret void + %notnull = icmp ne i32* %array, null + call void @f(i1 %notnull) call void @llvm.assume(i1 %notnull) + call void(i1, ...) @llvm.experimental.guard(i1 %notnull) [ "deopt"() ] + %znotnull = zext i1 %notnull to i8 br i1 %notnull, label %normal, label %error normal: + call void @f(i1 %notnull) + call void @fz(i8 %znotnull) ret void error: @@ -100,6 +209,26 @@ ret void } + +; can fold all uses and remove the cond +define void @can_fold_assume4(i32* %array) { +; CHECK-LABEL: can_fold_assume4 +; CHECK-NOT: notnull +; CHECK: dummy(i1 true) +; CHECK-NEXT: ret void + %notnull = icmp ne i32* %array, null + call void @exit() + call void @dummy(i1 %notnull) + call void @llvm.assume(i1 %notnull) + br i1 %notnull, label %normal, label %error + +normal: + ret void + +error: + store atomic i32 0, i32* @g unordered, align 4 + ret void +} ; Function Attrs: nounwind declare void @llvm.assume(i1) #1 Index: llvm/trunk/test/Transforms/JumpThreading/fold-not-thread.ll =================================================================== --- llvm/trunk/test/Transforms/JumpThreading/fold-not-thread.ll +++ llvm/trunk/test/Transforms/JumpThreading/fold-not-thread.ll @@ -133,10 +133,10 @@ ret void } -; FIXME: Make sure we can do the RAUW for %add... +; Make sure we can do the RAUW for %add... ; ; CHECK-LABEL: @rauw_if_possible( -; CHECK: call void @f4(i32 %add) +; CHECK: call void @f4(i32 96) define void @rauw_if_possible(i32 %value) nounwind { entry: %cmp = icmp eq i32 %value, 32 Index: llvm/trunk/test/Transforms/JumpThreading/guards.ll =================================================================== --- llvm/trunk/test/Transforms/JumpThreading/guards.ll +++ llvm/trunk/test/Transforms/JumpThreading/guards.ll @@ -182,86 +182,89 @@ ret void } -declare void @never_called() +declare void @never_called(i1) -; Assume the guard is always taken and we deoptimize, so we never reach the -; branch below that guard. We should *never* change the behaviour of a guard from -; `must deoptimize` to `may deoptimize`, since this affects the program -; semantics. +; LVI uses guard to identify value of %c2 in branch as true, we cannot replace that +; guard with guard(true & c1). define void @dont_fold_guard(i8* %addr, i32 %i, i32 %length) { ; CHECK-LABEL: dont_fold_guard -; CHECK: experimental.guard(i1 %wide.chk) - -entry: - br label %BBPred +; CHECK: %wide.chk = and i1 %c1, %c2 +; CHECK-NEXT: experimental.guard(i1 %wide.chk) +; CHECK-NEXT: call void @never_called(i1 true) +; CHECK-NEXT: ret void + %c1 = icmp ult i32 %i, %length + %c2 = icmp eq i32 %i, 0 + %wide.chk = and i1 %c1, %c2 + call void(i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ] + br i1 %c2, label %BB1, label %BB2 -BBPred: - %cond = icmp eq i8* %addr, null - br i1 %cond, label %zero, label %not_zero +BB1: + call void @never_called(i1 %c2) + ret void -zero: - unreachable +BB2: + ret void +} -not_zero: +declare void @dummy(i1) nounwind argmemonly +; same as dont_fold_guard1 but there's a use immediately after guard and before +; branch. We can fold that use. +define void @dont_fold_guard2(i8* %addr, i32 %i, i32 %length) { +; CHECK-LABEL: dont_fold_guard2 +; CHECK: %wide.chk = and i1 %c1, %c2 +; CHECK-NEXT: experimental.guard(i1 %wide.chk) +; CHECK-NEXT: dummy(i1 true) +; CHECK-NEXT: call void @never_called(i1 true) +; CHECK-NEXT: ret void %c1 = icmp ult i32 %i, %length %c2 = icmp eq i32 %i, 0 %wide.chk = and i1 %c1, %c2 call void(i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ] - br i1 %c2, label %unreachedBB2, label %unreachedBB1 + call void @dummy(i1 %c2) + br i1 %c2, label %BB1, label %BB2 -unreachedBB2: - call void @never_called() +BB1: + call void @never_called(i1 %c2) ret void -unreachedBB1: +BB2: ret void } - ; same as dont_fold_guard1 but condition %cmp is not an instruction. ; We cannot fold the guard under any circumstance. ; FIXME: We can merge unreachableBB2 into not_zero. -define void @dont_fold_guard2(i8* %addr, i1 %cmp, i32 %i, i32 %length) { -; CHECK-LABEL: dont_fold_guard2 +define void @dont_fold_guard3(i8* %addr, i1 %cmp, i32 %i, i32 %length) { +; CHECK-LABEL: dont_fold_guard3 ; CHECK: guard(i1 %cmp) - -entry: - br label %BBPred - -BBPred: - %cond = icmp eq i8* %addr, null - br i1 %cond, label %zero, label %not_zero - -zero: - unreachable - -not_zero: call void(i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ] - br i1 %cmp, label %unreachedBB2, label %unreachedBB1 + br i1 %cmp, label %BB1, label %BB2 -unreachedBB2: - call void @never_called() +BB1: + call void @never_called(i1 %cmp) ret void -unreachedBB1: +BB2: ret void } +declare void @f(i1) ; Same as dont_fold_guard1 but use switch instead of branch. ; triggers source code `ProcessThreadableEdges`. -declare void @f(i1) -define void @dont_fold_guard3(i1 %cmp1, i32 %i) nounwind { -; CHECK-LABEL: dont_fold_guard3 +define void @dont_fold_guard4(i1 %cmp1, i32 %i) nounwind { +; CHECK-LABEL: dont_fold_guard4 ; CHECK-LABEL: L2: ; CHECK-NEXT: %cmp = icmp eq i32 %i, 0 ; CHECK-NEXT: guard(i1 %cmp) -; CHECK-NEXT: @f(i1 %cmp) +; CHECK-NEXT: dummy(i1 true) +; CHECK-NEXT: @f(i1 true) ; CHECK-NEXT: ret void entry: br i1 %cmp1, label %L0, label %L3 L0: %cmp = icmp eq i32 %i, 0 call void(i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ] + call void @dummy(i1 %cmp) switch i1 %cmp, label %L3 [ i1 false, label %L1 i1 true, label %L2