Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2981,6 +2981,18 @@ return nullptr; } +static bool isProfitableToSink(CallInst &CI) { + // If the function is called some nonnull argument, we should not sink it. + // Later passes can be more powerful (e.g. optimize null checks) + for (auto &Arg : CI.getCalledFunction()->args()) { + auto *FArg = dyn_cast(CI.getArgOperand(Arg.getArgNo())); + if (FArg && FArg->hasNonNullAttr()) { + return false; + } + } + return true; +} + /// Try to move the specified instruction from its current block into the /// beginning of DestBlock, which can only happen if it's safe to move the /// instruction past all of the instructions between it and the end of its @@ -2995,8 +3007,8 @@ return false; // Do not sink alloca instructions out of the entry block. - if (isa(I) && I->getParent() == - &DestBlock->getParent()->getEntryBlock()) + if (isa(I) && + I->getParent() == &DestBlock->getParent()->getEntryBlock()) return false; // Do not sink into catchswitch blocks. @@ -3005,10 +3017,11 @@ // Do not sink convergent call instructions. if (auto *CI = dyn_cast(I)) { - if (CI->isConvergent()) + if (CI->isConvergent() || !isProfitableToSink(*CI)) return false; } - // We can only sink load instructions if there is nothing between the load and + // We can only sink load instructions if there is nothing between the load + // and // the end of block that could change the value. if (I->mayReadFromMemory()) { for (BasicBlock::iterator Scan = I->getIterator(), @@ -3021,7 +3034,8 @@ I->moveBefore(&*InsertPos); ++NumSunkInst; - // Also sink all related debug uses from the source basic block. Otherwise we + // Also sink all related debug uses from the source basic block. Otherwise + // we // get debug use before the def. SmallVector DbgUsers; findDbgUsers(DbgUsers, I); @@ -3032,105 +3046,110 @@ } } return true; -} + } -bool InstCombiner::run() { - while (!Worklist.isEmpty()) { - Instruction *I = Worklist.RemoveOne(); - if (I == nullptr) continue; // skip null values. + bool InstCombiner::run() { + while (!Worklist.isEmpty()) { + Instruction *I = Worklist.RemoveOne(); + if (I == nullptr) + continue; // skip null values. - // Check to see if we can DCE the instruction. - if (isInstructionTriviallyDead(I, &TLI)) { - LLVM_DEBUG(dbgs() << "IC: DCE: " << *I << '\n'); - eraseInstFromFunction(*I); - ++NumDeadInst; - MadeIRChange = true; - continue; - } + // Check to see if we can DCE the instruction. + if (isInstructionTriviallyDead(I, &TLI)) { + LLVM_DEBUG(dbgs() << "IC: DCE: " << *I << '\n'); + eraseInstFromFunction(*I); + ++NumDeadInst; + MadeIRChange = true; + continue; + } - if (!DebugCounter::shouldExecute(VisitCounter)) - continue; + if (!DebugCounter::shouldExecute(VisitCounter)) + continue; - // Instruction isn't dead, see if we can constant propagate it. - if (!I->use_empty() && - (I->getNumOperands() == 0 || isa(I->getOperand(0)))) { - if (Constant *C = ConstantFoldInstruction(I, DL, &TLI)) { - LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I - << '\n'); + // Instruction isn't dead, see if we can constant propagate it. + if (!I->use_empty() && + (I->getNumOperands() == 0 || isa(I->getOperand(0)))) { + if (Constant *C = ConstantFoldInstruction(I, DL, &TLI)) { + LLVM_DEBUG(dbgs() + << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); - // Add operands to the worklist. - replaceInstUsesWith(*I, C); - ++NumConstProp; - if (isInstructionTriviallyDead(I, &TLI)) - eraseInstFromFunction(*I); - MadeIRChange = true; - continue; + // Add operands to the worklist. + replaceInstUsesWith(*I, C); + ++NumConstProp; + if (isInstructionTriviallyDead(I, &TLI)) + eraseInstFromFunction(*I); + MadeIRChange = true; + continue; + } } - } - // In general, it is possible for computeKnownBits to determine all bits in - // a value even when the operands are not all constants. - Type *Ty = I->getType(); - if (ExpensiveCombines && !I->use_empty() && Ty->isIntOrIntVectorTy()) { - KnownBits Known = computeKnownBits(I, /*Depth*/0, I); - if (Known.isConstant()) { - Constant *C = ConstantInt::get(Ty, Known.getConstant()); - LLVM_DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C - << " from: " << *I << '\n'); + // In general, it is possible for computeKnownBits to determine all bits + // in + // a value even when the operands are not all constants. + Type *Ty = I->getType(); + if (ExpensiveCombines && !I->use_empty() && Ty->isIntOrIntVectorTy()) { + KnownBits Known = computeKnownBits(I, /*Depth*/ 0, I); + if (Known.isConstant()) { + Constant *C = ConstantInt::get(Ty, Known.getConstant()); + LLVM_DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C + << " from: " << *I << '\n'); - // Add operands to the worklist. - replaceInstUsesWith(*I, C); - ++NumConstProp; - if (isInstructionTriviallyDead(I, &TLI)) - eraseInstFromFunction(*I); - MadeIRChange = true; - continue; + // Add operands to the worklist. + replaceInstUsesWith(*I, C); + ++NumConstProp; + if (isInstructionTriviallyDead(I, &TLI)) + eraseInstFromFunction(*I); + MadeIRChange = true; + continue; + } } - } - // See if we can trivially sink this instruction to a successor basic block. - if (I->hasOneUse()) { - BasicBlock *BB = I->getParent(); - Instruction *UserInst = cast(*I->user_begin()); - BasicBlock *UserParent; + // See if we can trivially sink this instruction to a successor basic + // block. + if (I->hasOneUse()) { + BasicBlock *BB = I->getParent(); + Instruction *UserInst = cast(*I->user_begin()); + BasicBlock *UserParent; - // Get the block the use occurs in. - if (PHINode *PN = dyn_cast(UserInst)) - UserParent = PN->getIncomingBlock(*I->use_begin()); - else - UserParent = UserInst->getParent(); + // Get the block the use occurs in. + if (PHINode *PN = dyn_cast(UserInst)) + UserParent = PN->getIncomingBlock(*I->use_begin()); + else + UserParent = UserInst->getParent(); - if (UserParent != BB) { - bool UserIsSuccessor = false; - // See if the user is one of our successors. - for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) - if (*SI == UserParent) { - UserIsSuccessor = true; - break; - } + if (UserParent != BB) { + bool UserIsSuccessor = false; + // See if the user is one of our successors. + for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; + ++SI) + if (*SI == UserParent) { + UserIsSuccessor = true; + break; + } - // If the user is one of our immediate successors, and if that successor - // only has us as a predecessors (we'd have to split the critical edge - // otherwise), we can keep going. - if (UserIsSuccessor && UserParent->getUniquePredecessor()) { - // Okay, the CFG is simple enough, try to sink this instruction. - if (TryToSinkInstruction(I, UserParent)) { - LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); - MadeIRChange = true; - // We'll add uses of the sunk instruction below, but since sinking - // can expose opportunities for it's *operands* add them to the - // worklist - for (Use &U : I->operands()) - if (Instruction *OpI = dyn_cast(U.get())) - Worklist.Add(OpI); + // If the user is one of our immediate successors, and if that + // successor + // only has us as a predecessors (we'd have to split the critical edge + // otherwise), we can keep going. + if (UserIsSuccessor && UserParent->getUniquePredecessor()) { + // Okay, the CFG is simple enough, try to sink this instruction. + if (TryToSinkInstruction(I, UserParent)) { + LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); + MadeIRChange = true; + // We'll add uses of the sunk instruction below, but since sinking + // can expose opportunities for it's *operands* add them to the + // worklist + for (Use &U : I->operands()) + if (Instruction *OpI = dyn_cast(U.get())) + Worklist.Add(OpI); + } } } } - } - // Now that we have an instruction, try combining it to simplify it. - Builder.SetInsertPoint(I); - Builder.SetCurrentDebugLocation(I->getDebugLoc()); + // Now that we have an instruction, try combining it to simplify it. + Builder.SetInsertPoint(I); + Builder.SetCurrentDebugLocation(I->getDebugLoc()); #ifndef NDEBUG std::string OrigI; Index: test/Transforms/InstCombine/no-sink-nonnull-args.ll =================================================================== --- test/Transforms/InstCombine/no-sink-nonnull-args.ll +++ test/Transforms/InstCombine/no-sink-nonnull-args.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +define i32 @no_sink_nonnull(i8* nonnull readonly %s) { +; CHECK-LABEL: @no_sink_nonnull( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo(i8* nonnull [[S:%.*]]) +; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: ret i32 [[CALL]] +; +entry: + %call = tail call i32 @foo(i8* nonnull %s) + %tobool = icmp eq i8* %s, null + br i1 %tobool, label %if.then, label %if.end + +if.then: + tail call void @abort() + unreachable + +if.end: + ret i32 %call +} + +define i32 @sink(i8* readonly %s) { +; CHECK-LABEL: @sink( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8* [[S:%.*]], null +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: tail call void @abort() +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo(i8* nonnull [[S]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +entry: + %call = tail call i32 @foo(i8* %s) + %tobool = icmp eq i8* %s, null + br i1 %tobool, label %if.then, label %if.end + +if.then: + tail call void @abort() + unreachable + +if.end: + ret i32 %call +} + +; Function Attrs: argmemonly nounwind readonly +declare i32 @foo(i8* nocapture) local_unnamed_addr #1 +declare void @abort() + +attributes #1 = { argmemonly nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }