diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h --- a/llvm/include/llvm/IR/Value.h +++ b/llvm/include/llvm/IR/Value.h @@ -452,14 +452,11 @@ /// in the worst case, the whole use list of a value. bool hasOneUser() const; - /// Return true if there is exactly one use of this value that cannot be - /// dropped. - /// - /// This is specialized because it is a common request and does not require - /// traversing the whole use list. - Use *getSingleUndroppableUse(); - const Use *getSingleUndroppableUse() const { - return const_cast(this)->getSingleUndroppableUse(); + /// Return true if there is exactly one unique user of this value that cannot be + /// dropped (that user can have multiple uses of this value). + User *getUniqueUndroppableUser(); + const User *getUniqueUndroppableUser() const { + return const_cast(this)->getUniqueUndroppableUser(); } /// Return true if there this value. diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -164,13 +164,13 @@ static bool isUnDroppableUser(const User *U) { return !U->isDroppable(); } -Use *Value::getSingleUndroppableUse() { - Use *Result = nullptr; +User *Value::getUniqueUndroppableUser() { + User *Result = nullptr; for (Use &U : uses()) { if (!U.getUser()->isDroppable()) { - if (Result) + if (Result && Result != U.getUser()) return nullptr; - Result = &U; + Result = U.getUser(); } } return Result; diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3659,7 +3659,7 @@ /// instruction past all of the instructions between it and the end of its /// block. static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { - assert(I->getSingleUndroppableUse() && "Invariants didn't hold!"); + assert(I->getUniqueUndroppableUser() && "Invariants didn't hold!"); BasicBlock *SrcBlock = I->getParent(); // Cannot move control-flow-involving, volatile loads, vaarg, etc. @@ -3818,18 +3818,26 @@ [this](Instruction *I) -> Optional { if (!EnableCodeSinking) return None; - Use *SingleUse = I->getSingleUndroppableUse(); - if (!SingleUse) + auto *UserInst = dyn_cast_or_null(I->getUniqueUndroppableUser()); + if (!UserInst) return None; BasicBlock *BB = I->getParent(); - Instruction *UserInst = cast(SingleUse->getUser()); - BasicBlock *UserParent; - - // Get the block the use occurs in. - if (PHINode *PN = dyn_cast(UserInst)) - UserParent = PN->getIncomingBlock(*SingleUse); - else + BasicBlock *UserParent = nullptr; + + // Special handling for Phi nodes - get the block the use occurs in. + if (PHINode *PN = dyn_cast(UserInst)) { + for (unsigned i =0; i < PN->getNumIncomingValues(); i++) + if (PN->getIncomingValue(i) == I) { + // Bail out if we have more than one use in PN. We don't do any + // sophisticed analysis (i.e finding NearestCommonDominator of these + // use blocks). + if (UserParent && UserParent != PN->getIncomingBlock(i)) + return None; + UserParent = PN->getIncomingBlock(i); + } + assert(UserParent && "expected to find user block!"); + } else UserParent = UserInst->getParent(); // Try sinking to another block. If that block is unreachable, then do diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp --- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp +++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp @@ -161,8 +161,9 @@ if (wouldInstructionBeTriviallyDead(Inst)) { if (RK.WasOn->use_empty()) return false; - Use *SingleUse = RK.WasOn->getSingleUndroppableUse(); - if (SingleUse && SingleUse->getUser() == InstBeingModified) + auto *UniqueUser = + dyn_cast_or_null(RK.WasOn->getUniqueUndroppableUser()); + if (UniqueUser == InstBeingModified) return false; } return true; diff --git a/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll b/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll --- a/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll +++ b/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll @@ -56,9 +56,9 @@ define void @PR33765(i8 %beth) { ; CHECK-LABEL: @PR33765( -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[BETH:%.*]] to i32 ; CHECK-NEXT: br i1 false, label [[IF_THEN9:%.*]], label [[IF_THEN9]] ; CHECK: if.then9: +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[BETH:%.*]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]] ; CHECK-NEXT: [[TINKY:%.*]] = load i16, i16* @glob, align 2 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[MUL]] to i16 diff --git a/llvm/test/Transforms/InstCombine/sink_instruction.ll b/llvm/test/Transforms/InstCombine/sink_instruction.ll --- a/llvm/test/Transforms/InstCombine/sink_instruction.ll +++ b/llvm/test/Transforms/InstCombine/sink_instruction.ll @@ -56,6 +56,7 @@ declare i32 @bar() define i32 @test3(i32* nocapture readonly %P, i32 %i) { +; CHECK-LABEL: test3( entry: %idxprom = sext i32 %i to i64 %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom @@ -77,3 +78,79 @@ %sum.0 = phi i32 [ %add, %sw.bb ], [ 0, %entry ] ret i32 %sum.0 } + +declare i32 @foo(i32, i32) +; Two uses in a single user. We can still sink the instruction. +define i32 @test4(i32 %A, i32 %B, i1 %C) { +; CHECK-LABEL: test4( +entry: + %tmp.2 = sdiv i32 %A, %B ; [#uses=1] + %tmp.9 = add i32 %B, %A ; [#uses=1] + br i1 %C, label %then, label %endif + +then: ; preds = %entry +; CHECK-LABEL: then: +; CHECK: %tmp.9 = add i32 %B, %A +; CHECK-NEXT: %res = call i32 @foo(i32 %tmp.9, i32 %tmp.9) + %res = call i32 @foo(i32 %tmp.9, i32 %tmp.9) + ret i32 %res + +endif: ; preds = %entry +; CHECK-LABEL: endif: +; CHECK: sdiv i32 +; CHECK-NEXT: ret i32 %tmp.2 + ret i32 %tmp.2 +} + +; Two uses in a single user (phi node). We just bail out. +define i32 @test5(i32* nocapture readonly %P, i32 %i, i1 %cond) { +; CHECK-LABEL: test5 +entry: +; CHECK-LABEL: entry +; CHECK: load i32, i32* %arrayidx + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + br i1 %cond, label %dispatchBB, label %sw.epilog + +dispatchBB: + %add = add nsw i32 %i, %i + br label %sw.epilog + +sw.bb: ; preds = %entry, %entry +; CHECK-LABEL: sw.bb: +; CHECK-NEXT: br label %sw.epilog + br label %sw.epilog + +sw.epilog: ; preds = %entry, %sw.bb + %sum.0 = phi i32 [ %0, %sw.bb ], [ %add, %dispatchBB ], [ %0, %entry ] + ret i32 %sum.0 +} + +; Multiple uses but from same BB. We can sink. +define i32 @test6(i32* nocapture readonly %P, i32 %i, i1 %cond) { +; CHECK-LABEL: test6 +entry: + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %i, %i + br label %dispatchBB + +dispatchBB: +; CHECK-LABEL: dispatchBB +; CHECK: load i32, i32* %arrayidx + switch i32 %i, label %sw.bb [ + i32 5, label %sw.epilog + i32 2, label %sw.epilog + ] + +sw.bb: ; preds = %entry, %entry +; CHECK-LABEL: sw.bb: +; CHECK-NEXT: br label %sw.epilog + br label %sw.epilog + +sw.epilog: ; preds = %entry, %sw.bb + %sum.0 = phi i32 [ %add, %sw.bb ], [ %0, %dispatchBB ], [ %0, %dispatchBB ] + ret i32 %sum.0 +}