diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -378,7 +378,8 @@ } void removeAllAssertingVHReferences(Value *V); - bool eliminateAssumptions(Function &F); + bool hoistAssumptions(Function &F); + bool hoistAssumptions(BasicBlock *BB); bool eliminateFallThrough(Function &F); bool eliminateMostlyEmptyBlocks(Function &F); BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); @@ -509,10 +510,11 @@ } } - // Get rid of @llvm.assume builtins before attempting to eliminate empty + // Get rid of @llvm.assume builtins (if they and their predecessors are the + // only real occupants of a block) before attempting to eliminate empty // blocks, since there might be blocks that only contain @llvm.assume calls // (plus arguments that we can get rid of). - EverMadeChange |= eliminateAssumptions(F); + EverMadeChange |= hoistAssumptions(F); // Eliminate blocks that contain only PHI nodes and an // unconditional branch. @@ -623,23 +625,83 @@ return EverMadeChange; } -bool CodeGenPrepare::eliminateAssumptions(Function &F) { - bool MadeChange = false; - for (BasicBlock &BB : F) { - CurInstIterator = BB.begin(); - while (CurInstIterator != BB.end()) { - Instruction *I = &*(CurInstIterator++); - if (auto *Assume = dyn_cast(I)) { - MadeChange = true; - Value *Operand = Assume->getOperand(0); - Assume->eraseFromParent(); +/// Hoist @llvm.assume and inputs of BB into single predecessor Pred if they are +/// the only non-terminator instructions in BB. When hoisted the original +/// assumption %X needs to be weakened to include the possibility that the +/// branch was not taken. That is with branch condition %Y in Pred the new +/// assumption becomes (!%Y | %X) if BB is the true-target of Pred and (%Y | %X) +/// if BB is the false-target of Pred. +bool CodeGenPrepare::hoistAssumptions(BasicBlock *BB) { + auto *Pred = BB->getSinglePredecessor(); + if (!Pred) + return false; - resetIteratorIfInvalidatedWhileCalling(&BB, [&]() { - RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr); - }); - } + SetVector Candidates; + + for (auto It = BB->rbegin(), E = BB->rend(); It != E; ++It) { + auto I = &(*It); + // Skip the terminator. + if (I->isTerminator()) + continue; + + // Bail if side-effects. + if (I->mayHaveSideEffects() && !isa(I)) + return false; + + // Insert into Candidates either if instruction is a @llvm.assume or if all + // uses of the instruction are covered by the Candidates set. Otherwise + // bail. + if (isa(I)) + Candidates.insert(I); + else if (!empty(I->users()) && all_of(I->users(), [&](User *U) { + return isa(U) + ? Candidates.count(cast(U)) + : false; + })) + Candidates.insert(I); + else + return false; + } + + if (!Candidates.size()) + return false; + + // If we reach this point BB contains only @llvm.assume instructions and + // their input. Hoist into Pred. + + auto *PredBranch = dyn_cast(Pred->getTerminator()); + if (!PredBranch || PredBranch->isUnconditional()) + return false; + + // Now move Candidates into Pred. When we get to a @llvm.assume update the + // assume condition to depend on PredBranch->getCondition(). + Module *M = BB->getParent()->getParent(); + IRBuilder<> IRB(M->getContext()); + IRB.SetInsertPoint(PredBranch); + + Value *Cond = PredBranch->getCondition(); + auto *PredTrueSucc = PredBranch->getSuccessor(0); + if (BB == PredTrueSucc) + Cond = IRB.CreateNot(Cond); + + for (auto It = Candidates.rbegin(), E = Candidates.rend(); It != E; ++It) { + if (auto *Assume = dyn_cast(*It)) { + auto *OrCond = IRB.CreateOr(Cond, Assume->getOperand(0)); + IRB.CreateAssumption(OrCond); + Assume->eraseFromParent(); + } else { + auto *I = *It; + I->moveBefore(PredBranch); } } + + return true; +} + +bool CodeGenPrepare::hoistAssumptions(Function &F) { + bool MadeChange = false; + for (auto &BB : F) + MadeChange |= hoistAssumptions(&BB); return MadeChange; } @@ -2166,8 +2228,6 @@ if (II) { switch (II->getIntrinsicID()) { default: break; - case Intrinsic::assume: - llvm_unreachable("llvm.assume should have been removed already"); case Intrinsic::experimental_widenable_condition: { // Give up on future widening oppurtunties so that we can fold away dead // paths and merge blocks before going into block-local instruction diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/delete-assume-dead-code.ll b/llvm/test/Transforms/CodeGenPrepare/X86/delete-assume-dead-code.ll --- a/llvm/test/Transforms/CodeGenPrepare/X86/delete-assume-dead-code.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/delete-assume-dead-code.ll @@ -5,8 +5,14 @@ ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[L:%.*]] = load i8, i8* [[D:%.*]], align 1 -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i8 [[L]], 0 -; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[L]], 0 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* [[D]], i32 42 +; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(i8* [[GEP]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[CALL]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[CMP]], [[CMP2]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[L]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[CONV]] ; entry: @@ -27,4 +33,4 @@ } declare i64 @foo(i8*) nounwind readonly willreturn -declare void @llvm.assume(i1 noundef) nounwind willreturn +declare void @llvm.assume(i1) nounwind willreturn diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/recursively-delete-dead-instructions.ll b/llvm/test/Transforms/CodeGenPrepare/X86/recursively-delete-dead-instructions.ll deleted file mode 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/recursively-delete-dead-instructions.ll +++ /dev/null @@ -1,27 +0,0 @@ -; RUN: opt -codegenprepare -S -mtriple=x86_64-linux < %s | FileCheck %s - -declare void @llvm.assume(i1 noundef) nounwind willreturn - -; Recursively deleting dead operands of assume() may result in its next -; instruction deleted and the iterator pointing to the next instruction -; invalidated. This prevents the following simple loop in -; CodeGenPrepare::optimizeBlock() unless CurInstIterator is fixed: -; -; CurInstIterator = BB.begin(); -; while (CurInstIterator != BB.end()) -; optimizeInst(&*CurInstIterator++, ModifiedDT); -; -define i32 @test_assume_in_loop(i1 %cond1, i1 %cond2) { -; CHECK-LABEL: @test_assume_in_loop( -; CHECK-NEXT: entry: -entry: - br label %loop - -; CHECK: loop: -; CHECK-NEXT: br label %loop -loop: - %cond3 = phi i1 [%cond1, %entry], [%cond4, %loop] - call void @llvm.assume(i1 %cond3) - %cond4 = icmp ult i1 %cond1, %cond2 - br label %loop -} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/remove-assume-block.ll b/llvm/test/Transforms/CodeGenPrepare/X86/remove-assume-block.ll --- a/llvm/test/Transforms/CodeGenPrepare/X86/remove-assume-block.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/remove-assume-block.ll @@ -1,12 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -codegenprepare -mtriple=x86_64-linux < %s | FileCheck %s ; ; Ensure that blocks that only contain @llvm.assume are removed completely ; during CodeGenPrepare. -; CHECK-LABEL: @simple( -; CHECK-NEXT: end: -; CHECK-NEXT: ret void define void @simple(i64 %addr, i1 %assumption) { +; CHECK-LABEL: @simple( +; CHECK-NEXT: end: +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[ADDR:%.*]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[CMP1]], true +; CHECK-NEXT: [[TMP1:%.*]] = or i1 [[TMP0]], [[ASSUMPTION:%.*]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) +; CHECK-NEXT: ret void +; %cmp1 = icmp eq i64 %addr, 0 br i1 %cmp1, label %do_assume, label %end @@ -18,11 +24,26 @@ ret void } -; CHECK-LABEL: @complex_assume( -; CHECK-NEXT: end: -; CHECK-NEXT: ret void define void @complex_assume(i64 %addr, i1 %assumption_a, i1 %assumption_b, - i64 %val_a, i64 %val_b) { +; CHECK-LABEL: @complex_assume( +; CHECK-NEXT: end: +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[ADDR:%.*]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[CMP1]], true +; CHECK-NEXT: [[TMP1:%.*]] = or i1 [[TMP0]], [[ASSUMPTION_A:%.*]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[ASSUMPTION_B:%.*]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP2]]) +; CHECK-NEXT: [[VAL_XOR:%.*]] = xor i64 [[VAL_A:%.*]], [[VAL_B:%.*]] +; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = lshr i64 [[VAL_XOR]], 7 +; CHECK-NEXT: [[ASSUMPTION_C:%.*]] = trunc i64 [[VAL_SHIFTED]] to i1 +; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[TMP0]], [[ASSUMPTION_C]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP3]]) +; CHECK-NEXT: [[ASSUMPTION_D:%.*]] = call i1 @readonly_func(i64 [[VAL_B]]) +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP0]], [[ASSUMPTION_D]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP4]]) +; CHECK-NEXT: ret void +; + i64 %val_a, i64 %val_b) { %cmp1 = icmp eq i64 %addr, 0 br i1 %cmp1, label %do_assume, label %end diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/tailcall-assume-xbb.ll b/llvm/test/Transforms/CodeGenPrepare/X86/tailcall-assume-xbb.ll --- a/llvm/test/Transforms/CodeGenPrepare/X86/tailcall-assume-xbb.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/tailcall-assume-xbb.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -codegenprepare -S -mtriple=x86_64-linux < %s | FileCheck %s ; The ret instruction can be duplicated into BB case2 even though there is an @@ -5,13 +6,32 @@ @ptr = external global i8*, align 8 -; CHECK: %ret1 = tail call i8* @qux() -; CHECK-NEXT: ret i8* %ret1 - -; CHECK: %ret2 = tail call i8* @bar() -; CHECK-NEXT: ret i8* %ret2 - define i8* @foo(i64 %size, i64 %v1, i64 %v2) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[SIZE:%.*]], 1025 +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_END:%.*]], label [[CASE1:%.*]] +; CHECK: case1: +; CHECK-NEXT: [[RET1:%.*]] = tail call i8* @qux() +; CHECK-NEXT: ret i8* [[RET1]] +; CHECK: if.end: +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[V1:%.*]], [[V2:%.*]] +; CHECK-NEXT: br i1 [[CMP2]], label [[CASE3:%.*]], label [[CASE2:%.*]] +; CHECK: case2: +; CHECK-NEXT: [[RET2:%.*]] = tail call i8* @bar() +; CHECK-NEXT: br label [[EXIT1:%.*]] +; CHECK: case3: +; CHECK-NEXT: [[RET3:%.*]] = load i8*, i8** @ptr, align 8 +; CHECK-NEXT: br label [[EXIT1]] +; CHECK: exit1: +; CHECK-NEXT: [[RETVAL1:%.*]] = phi i8* [ [[RET2]], [[CASE2]] ], [ [[RET3]], [[CASE3]] ] +; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i8* [[RETVAL1]], null +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP3]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A]]) #[[ATTR2]] +; CHECK-NEXT: ret i8* [[RETVAL1]] +; entry: %a = alloca i8 call void @llvm.lifetime.start.p0i8(i64 -1, i8* %a) nounwind