diff --git a/llvm/include/llvm/IR/KnowledgeRetention.h b/llvm/include/llvm/IR/KnowledgeRetention.h --- a/llvm/include/llvm/IR/KnowledgeRetention.h +++ b/llvm/include/llvm/IR/KnowledgeRetention.h @@ -115,6 +115,16 @@ U->getOperandNo()); } +/// Return true iff the operand bundles of the provided llvm.assume doesn't +/// contain any valuable information. This is true when: +/// - The operand bundle is empty +/// - The operand bundle only contains information about dropped values or +/// constant folded values. +/// +/// the argument to the call of llvm.assume may still be useful even if the +/// function returned true. +bool isAssumeWithEmptyBundle(CallInst &Assume); + //===----------------------------------------------------------------------===// // Utilities for testing //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/KnowledgeRetention.cpp b/llvm/lib/IR/KnowledgeRetention.cpp --- a/llvm/lib/IR/KnowledgeRetention.cpp +++ b/llvm/lib/IR/KnowledgeRetention.cpp @@ -266,6 +266,16 @@ return Result; } +bool llvm::isAssumeWithEmptyBundle(CallInst &CI) { + IntrinsicInst &Assume = cast(CI); + assert(Assume.getIntrinsicID() == Intrinsic::assume && + "this function is intended to be used on llvm.assume"); + return none_of(Assume.bundle_op_infos(), + [](const CallBase::BundleOpInfo &BOI) { + return BOI.Tag->getKey() != "ignore"; + }); +} + PreservedAnalyses AssumeBuilderPass::run(Function &F, FunctionAnalysisManager &AM) { for (Instruction &I : instructions(F)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -48,6 +48,7 @@ #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsPowerPC.h" +#include "llvm/IR/KnowledgeRetention.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PatternMatch.h" @@ -4078,7 +4079,7 @@ // then this one is redundant, and should be removed. KnownBits Known(1); computeKnownBits(IIOperand, Known, 0, II); - if (Known.isAllOnes()) + if (Known.isAllOnes() && isAssumeWithEmptyBundle(*II)) return eraseInstFromFunction(*II); // Update the cache of affected values for this assumption (we might be diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3317,7 +3317,7 @@ /// instruction past all of the instructions between it and the end of its /// block. static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { - assert(I->hasOneUse() && "Invariants didn't hold!"); + assert(I->getSingleUndroppableUse() && "Invariants didn't hold!"); BasicBlock *SrcBlock = I->getParent(); // Cannot move control-flow-involving, volatile loads, vaarg, etc. @@ -3350,6 +3350,15 @@ if (Scan->mayWriteToMemory()) return false; } + + I->dropDroppableUses([DestBlock](const Use *U) { + if (auto *I = dyn_cast(U->getUser())) + return I->getParent() != DestBlock; + return true; + }); + /// FIXME: We could remove droppable uses that are not dominated by + /// the new position. + BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); I->moveBefore(&*InsertPos); ++NumSunkInst; @@ -3473,44 +3482,46 @@ } // See if we can trivially sink this instruction to a successor basic block. - if (EnableCodeSinking && I->hasOneUse()) { - BasicBlock *BB = I->getParent(); - Instruction *UserInst = cast(*I->user_begin()); - BasicBlock *UserParent; - - // Get the block the use occurs in. - if (PHINode *PN = dyn_cast(UserInst)) - UserParent = PN->getIncomingBlock(*I->use_begin()); - else - UserParent = UserInst->getParent(); - - if (UserParent != BB) { - bool UserIsSuccessor = false; - // See if the user is one of our successors. - for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) - if (*SI == UserParent) { - UserIsSuccessor = true; - break; - } + if (EnableCodeSinking) + if (Use *SingleUse = I->getSingleUndroppableUse()) { + BasicBlock *BB = I->getParent(); + Instruction *UserInst = cast(SingleUse->getUser()); + BasicBlock *UserParent; + + // Get the block the use occurs in. + if (PHINode *PN = dyn_cast(UserInst)) + UserParent = PN->getIncomingBlock(*I->use_begin()); + else + UserParent = UserInst->getParent(); + + if (UserParent != BB) { + bool UserIsSuccessor = false; + // See if the user is one of our successors. + for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; + ++SI) + if (*SI == UserParent) { + UserIsSuccessor = true; + break; + } - // If the user is one of our immediate successors, and if that successor - // only has us as a predecessors (we'd have to split the critical edge - // otherwise), we can keep going. - if (UserIsSuccessor && UserParent->getUniquePredecessor()) { - // Okay, the CFG is simple enough, try to sink this instruction. - if (TryToSinkInstruction(I, UserParent)) { - LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); - MadeIRChange = true; - // We'll add uses of the sunk instruction below, but since sinking - // can expose opportunities for it's *operands* add them to the - // worklist - for (Use &U : I->operands()) - if (Instruction *OpI = dyn_cast(U.get())) - Worklist.push(OpI); + // If the user is one of our immediate successors, and if that + // successor only has us as a predecessors (we'd have to split the + // critical edge otherwise), we can keep going. + if (UserIsSuccessor && UserParent->getUniquePredecessor()) { + // Okay, the CFG is simple enough, try to sink this instruction. + if (TryToSinkInstruction(I, UserParent)) { + LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); + MadeIRChange = true; + // We'll add uses of the sunk instruction below, but since sinking + // can expose opportunities for it's *operands* add them to the + // worklist + for (Use &U : I->operands()) + if (Instruction *OpI = dyn_cast(U.get())) + Worklist.push(OpI); + } } } } - } // Now that we have an instruction, try combining it to simplify it. Builder.SetInsertPoint(I); diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -59,6 +59,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/KnowledgeRetention.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" @@ -411,7 +412,8 @@ // true are operationally no-ops. In the future we can consider more // sophisticated tradeoffs for guards considering potential for check // widening, but for now we keep things simple. - if (II->getIntrinsicID() == Intrinsic::assume || + if ((II->getIntrinsicID() == Intrinsic::assume && + isAssumeWithEmptyBundle(*II)) || II->getIntrinsicID() == Intrinsic::experimental_guard) { if (ConstantInt *Cond = dyn_cast(II->getArgOperand(0))) return !Cond->isZero(); diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -269,9 +269,9 @@ ; CHECK-LABEL: @nonnull3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32* [[LOAD]], null ; CHECK-NEXT: br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]] ; CHECK: taken: -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32* [[LOAD]], null ; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: ret i1 false ; CHECK: not_taken: @@ -398,6 +398,117 @@ ret i32 %t2 } +define i1 @nonnull3A(i32** %a, i1 %control) { +; CHECK-LABEL: @nonnull3A( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8 +; CHECK-NEXT: br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]] +; CHECK: taken: +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32* [[LOAD]], null +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i1 true +; CHECK: not_taken: +; CHECK-NEXT: [[RVAL_2:%.*]] = icmp sgt i32* [[LOAD]], null +; CHECK-NEXT: ret i1 [[RVAL_2]] +; +entry: + %load = load i32*, i32** %a + %cmp = icmp ne i32* %load, null + br i1 %control, label %taken, label %not_taken +taken: + call void @llvm.assume(i1 %cmp) + ret i1 %cmp +not_taken: + call void @llvm.assume(i1 %cmp) + %rval.2 = icmp sgt i32* %load, null + ret i1 %rval.2 +} + +define i1 @nonnull3B(i32** %a, i1 %control) { +; CHECK-LABEL: @nonnull3B( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]] +; CHECK: taken: +; CHECK-NEXT: [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32* [[LOAD]], null +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) [ "nonnull"(i32* [[LOAD]]), "nonnull"(i1 [[CMP]]) ] +; CHECK-NEXT: ret i1 true +; CHECK: not_taken: +; CHECK-NEXT: ret i1 [[CONTROL]] +; +entry: + %load = load i32*, i32** %a + %cmp = icmp ne i32* %load, null + br i1 %control, label %taken, label %not_taken +taken: + call void @llvm.assume(i1 %cmp) ["nonnull"(i32* %load), "nonnull"(i1 %cmp)] + ret i1 %cmp +not_taken: + call void @llvm.assume(i1 %cmp) ["nonnull"(i32* %load), "nonnull"(i1 %cmp)] + ret i1 %control +} + +declare i1 @tmp1(i1) + +define i1 @nonnull3C(i32** %a, i1 %control) { +; CHECK-LABEL: @nonnull3C( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]] +; CHECK: taken: +; CHECK-NEXT: [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32* [[LOAD]], null +; CHECK-NEXT: [[CMP2:%.*]] = call i1 @tmp1(i1 [[CMP]]) +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: not_taken: +; CHECK-NEXT: ret i1 [[CONTROL]] +; +entry: + %load = load i32*, i32** %a + %cmp = icmp ne i32* %load, null + br i1 %control, label %taken, label %not_taken +taken: + %cmp2 = call i1 @tmp1(i1 %cmp) + br label %exit +exit: + ; FIXME: this shouldn't be dropped because it is still dominated by the new position of %load + call void @llvm.assume(i1 %cmp) ["nonnull"(i32* %load), "nonnull"(i1 %cmp)] + ret i1 %cmp2 +not_taken: + call void @llvm.assume(i1 %cmp) + ret i1 %control +} + +define i1 @nonnull3D(i32** %a, i1 %control) { +; CHECK-LABEL: @nonnull3D( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]] +; CHECK: taken: +; CHECK-NEXT: [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32* [[LOAD]], null +; CHECK-NEXT: [[CMP2:%.*]] = call i1 @tmp1(i1 [[CMP]]) +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: not_taken: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "ignore"(i32* undef), "ignore"(i1 undef), "nonnull"(i1 [[CONTROL]]) ] +; CHECK-NEXT: ret i1 [[CONTROL]] +; +entry: + %load = load i32*, i32** %a + %cmp = icmp ne i32* %load, null + br i1 %control, label %taken, label %not_taken +taken: + %cmp2 = call i1 @tmp1(i1 %cmp) + br label %exit +exit: + ret i1 %cmp2 +not_taken: + call void @llvm.assume(i1 %cmp) ["nonnull"(i32* %load), "nonnull"(i1 %cmp), "nonnull"(i1 %control)] + ret i1 %control +} + declare void @llvm.dbg.value(metadata, metadata, metadata) !llvm.dbg.cu = !{!0}