Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2789,7 +2789,25 @@ /// instruction past all of the instructions between it and the end of its /// block. static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { - assert(I->hasOneUse() && "Invariants didn't hold!"); + // verify that all uses are in the same block. + DEBUG( + BasicBlock *UsersParent = nullptr; + int UseCount = 0; + for (const Use &U : I->uses()) { + if (++UseCount > 4) + llvm_unreachable("Instruction has too many uses to be sunk."); + Instruction *UserInst = cast(U.getUser()); + BasicBlock *CurrentUserParent; + // Get the block the use occurs in. + if (PHINode *PN = dyn_cast(UserInst)) + CurrentUserParent = PN->getIncomingBlock(U); + else + CurrentUserParent = UserInst->getParent(); + if (UsersParent == nullptr) + UsersParent = CurrentUserParent; + else if (CurrentUserParent != UsersParent) + llvm_unreachable("Instruction has uses in different blocks."); + }); // Cannot move control-flow-involving, volatile loads, vaarg, etc. if (isa(I) || I->isEHPad() || I->mayHaveSideEffects() || @@ -2879,42 +2897,50 @@ } // See if we can trivially sink this instruction to a successor basic block. - if (I->hasOneUse()) { - BasicBlock *BB = I->getParent(); - Instruction *UserInst = cast(*I->user_begin()); - BasicBlock *UserParent; - + // First see if we have a small number of uses, all in the same block. + bool UsesInOneBlock = false; + BasicBlock *UsersParent = nullptr; + int MaxUseCount = 4; + BasicBlock *BB = I->getParent(); + + for (const Use &U : I->uses()) { + if (--MaxUseCount < 0) { + UsesInOneBlock = false; + break; + } + Instruction *UserInst = cast(U.getUser()); + BasicBlock *CurrentUserParent; // Get the block the use occurs in. if (PHINode *PN = dyn_cast(UserInst)) - UserParent = PN->getIncomingBlock(*I->use_begin()); + CurrentUserParent = PN->getIncomingBlock(U); else - UserParent = UserInst->getParent(); - - if (UserParent != BB) { - bool UserIsSuccessor = false; - // See if the user is one of our successors. - for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) - if (*SI == UserParent) { - UserIsSuccessor = true; - break; - } + CurrentUserParent = UserInst->getParent(); + if (UsersParent == nullptr) { + UsersParent = CurrentUserParent; + // If the user's block is one of our immediate successors, and if that + // successor only has us as a predecessors (we'd have to split the + // critical edge otherwise), we can keep going. + if (UsersParent == BB || + UsersParent->getUniquePredecessor() != BB) + break; + UsesInOneBlock = true; + } else if (CurrentUserParent != UsersParent) { + UsesInOneBlock = false; + break; + } + } - // If the user is one of our immediate successors, and if that successor - // only has us as a predecessors (we'd have to split the critical edge - // otherwise), we can keep going. - if (UserIsSuccessor && UserParent->getUniquePredecessor()) { - // Okay, the CFG is simple enough, try to sink this instruction. - if (TryToSinkInstruction(I, UserParent)) { - DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); - MadeIRChange = true; - // We'll add uses of the sunk instruction below, but since sinking - // can expose opportunities for it's *operands* add them to the - // worklist - for (Use &U : I->operands()) - if (Instruction *OpI = dyn_cast(U.get())) - Worklist.Add(OpI); - } - } + if (UsesInOneBlock) { + // Okay, the CFG is simple enough, try to sink this instruction. + if (TryToSinkInstruction(I, UsersParent)) { + DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); + MadeIRChange = true; + // We'll add uses of the sunk instruction below, but since sinking + // can expose opportunities for it's *operands* add them to the + // worklist + for (Use &U : I->operands()) + if (Instruction *OpI = dyn_cast(U.get())) + Worklist.Add(OpI); } } Index: test/Transforms/InstCombine/sink-multiple-use.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/sink-multiple-use.ll @@ -0,0 +1,40 @@ +; RUN: opt -instcombine -S -o - %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @sink_instr_with_2_uses(i32* %a, i32* %b) local_unnamed_addr { +entry: + br label %for.cond.outer + +for.cond.outer: ; preds = %for.end.outer, %entry + %j.for.cond.outer = phi i32 [ 10, %entry ], [ %j.for.end.outer, %for.end.outer ] + %shl = shl i32 1, %j.for.cond.outer + %sunk_sub = add nsw i32 %shl, -1 + %cmp = icmp slt i32 %shl, 1 + br i1 %cmp, label %for.inner.ph, label %for.end.outer + +; CHECK-LABEL: for.inner.ph: +; CHECK-NEXT: %sunk_sub = add nsw i32 %shl, -1 +for.inner.ph: ; preds = %for.cond.outer + %0 = load i32, i32* %a, align 4 + %tobool = icmp eq i32 %0, 0 + br i1 %tobool, label %for.inner.top, label %for.inner.bottom + +for.inner.top: ; preds = %for.inner.top, %for.inner.ph + %i.top = phi i32 [ %i.add.top, %for.inner.top ], [ %sunk_sub, %for.inner.ph ] + %i.add.top = add nsw i32 %i.top, 1 + %cmp.top = icmp slt i32 %i.add.top, 0 + br i1 %cmp.top, label %for.inner.top, label %for.end.outer + +for.inner.bottom: ; preds = %for.inner.bottom, %for.inner.ph + %i.bottom = phi i32 [ %i.add.bottom, %for.inner.bottom ], [ %sunk_sub, %for.inner.ph ] + %i.add.bottom = add nsw i32 %i.bottom, 2 + %cmp.bottom = icmp slt i32 %i.add.bottom, 0 + br i1 %cmp.bottom, label %for.inner.bottom, label %for.end.outer + +for.end.outer: ; preds = %for.cond.outer, %for.inner.top, %for.inner.bottom + %storevalue = phi i32 [0, %for.cond.outer], [ %i.add.top, %for.inner.top ], [ %i.add.bottom, %for.inner.bottom ] + %j.for.end.outer = add nsw i32 %j.for.cond.outer, 1 + store i32 %storevalue, i32* %b, align 4 + br label %for.cond.outer +}