Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3713,15 +3713,17 @@ /// beginning of DestBlock, which can only happen if it's safe to move the /// instruction past all of the instructions between it and the end of its /// block. -static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { +static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock, + TargetLibraryInfo &TLI) { assert(I->getUniqueUndroppableUser() && "Invariants didn't hold!"); BasicBlock *SrcBlock = I->getParent(); // Cannot move control-flow-involving, volatile loads, vaarg, etc. - if (isa(I) || I->isEHPad() || I->mayHaveSideEffects() || + if (isa(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() || I->isTerminator()) return false; + // Do not sink static or dynamic alloca instructions. Static allocas must // remain in the entry block, and dynamic allocas must not be sunk in between // a stacksave / stackrestore pair, which would incorrectly shorten its @@ -3738,6 +3740,43 @@ if (CI->isConvergent()) return false; } + + // Unless we can prove that the memory write isn't visibile except on the + // path we're sinking to, we must bail. + if (I->mayWriteToMemory()) { + // Check for case where the call writes to an otherwise dead alloca. This + // shows up for unused out-params in idiomatic C/C++ code. + auto *CB = cast(I); + if (!CB) + // TODO: handle e.g. store to alloca here - only worth doing if we extend + // to allow reload along used path as described below. Otherwise, this + // is simply a store to a dead allocation which will be removed. + return false; + Optional Dest = MemoryLocation::getForDest(CB, TLI); + if (!Dest) + return false; + auto *AI = dyn_cast(Dest->Ptr->stripPointerCasts()); + if (!AI) + // TODO: allow malloc? + return false; + // TODO: allow memory access dominated by move point? Note that since AI + // could have a reference to itself captured by the call, we would need to + // account for cycles in doing so. + SmallVector AllocaUsers(AI->users()); + while (!AllocaUsers.empty()) { + auto *UserI = dyn_cast(AllocaUsers.pop_back_val()); + if (isa(UserI) || isa(UserI)) { + AllocaUsers.append(UserI->user_begin(), UserI->user_end()); + continue; + } + if (UserI == CB) + continue; + auto *II = dyn_cast(UserI); + if (!II || !II->isLifetimeStartOrEnd()) + return false; + } + } + // We can only sink load instructions if there is nothing between the load and // the end of block that could change the value. if (I->mayReadFromMemory()) { @@ -3746,7 +3785,7 @@ // successor block. if (DestBlock->getUniquePredecessor() != I->getParent()) return false; - for (BasicBlock::iterator Scan = I->getIterator(), + for (BasicBlock::iterator Scan = std::next(I->getIterator()), E = I->getParent()->end(); Scan != E; ++Scan) if (Scan->mayWriteToMemory()) @@ -3922,7 +3961,7 @@ if (OptBB) { auto *UserParent = *OptBB; // Okay, the CFG is simple enough, try to sink this instruction. - if (TryToSinkInstruction(I, UserParent)) { + if (TryToSinkInstruction(I, UserParent, TLI)) { LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); MadeIRChange = true; // We'll add uses of the sunk instruction below, but since Index: llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll =================================================================== --- llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll +++ llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll @@ -112,15 +112,38 @@ declare i32 @unknown(i32* %dest) -define i32 @sink_to_use(i1 %c) { -; CHECK-LABEL: @sink_to_use( +define i32 @sink_write_to_use(i1 %c) { +; CHECK-LABEL: @sink_write_to_use( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1:[0-9]+]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 ; CHECK: use_block: +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull writeonly [[VAR]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 4 + %var3 = call i32 @unknown(i32* writeonly %var) argmemonly nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + +define i32 @sink_readwrite_to_use(i1 %c) { +; CHECK-LABEL: @sink_readwrite_to_use( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1]] ; CHECK-NEXT: ret i32 [[VAR3]] ; entry: