Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3727,12 +3727,13 @@ /// beginning of DestBlock, which can only happen if it's safe to move the /// instruction past all of the instructions between it and the end of its /// block. -static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { +static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock, + TargetLibraryInfo &TLI) { assert(I->getUniqueUndroppableUser() && "Invariants didn't hold!"); BasicBlock *SrcBlock = I->getParent(); // Cannot move control-flow-involving, volatile loads, vaarg, etc. - if (isa(I) || I->isEHPad() || I->mayHaveSideEffects() || + if (isa(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() || I->isTerminator()) return false; @@ -3752,6 +3753,51 @@ if (CI->isConvergent()) return false; } + + // Unless we can prove that the memory write isn't visibile except on the + // path we're sinking to, we must bail. + if (I->mayWriteToMemory()) { + // Check for case where the call writes to an otherwise dead alloca. This + // shows up for unused out-params in idiomatic C/C++ code. + auto *CB = cast(I); + if (!CB) + // TODO: handle e.g. store to alloca here - only worth doing if we extend + // to allow reload along used path as described below. Otherwise, this + // is simply a store to a dead allocation which will be removed. + return false; + Optional Dest = MemoryLocation::getForDest(CB, TLI); + if (!Dest) + return false; + auto *AI = dyn_cast(getUnderlyingObject(Dest->Ptr)); + if (!AI) + // TODO: allow malloc? + return false; + // TODO: allow memory access dominated by move point? Note that since AI + // could have a reference to itself captured by the call, we would need to + // account for cycles in doing so. + SmallVector AllocaUsers; + SmallPtrSet Visited; + auto pushUsers = [&](const Instruction &I) { + for (const User *U : I.users()) { + if (Visited.insert(U).second) + AllocaUsers.push_back(U); + } + }; + pushUsers(*AI); + while (!AllocaUsers.empty()) { + auto *UserI = cast(AllocaUsers.pop_back_val()); + if (isa(UserI) || isa(UserI) || + isa(UserI)) { + pushUsers(*UserI); + continue; + } + if (UserI == CB) + continue; + // TODO: support lifetime.start/end here + return false; + } + } + // We can only sink load instructions if there is nothing between the load and // the end of block that could change the value. if (I->mayReadFromMemory()) { @@ -3760,7 +3806,7 @@ // successor block. if (DestBlock->getUniquePredecessor() != I->getParent()) return false; - for (BasicBlock::iterator Scan = I->getIterator(), + for (BasicBlock::iterator Scan = std::next(I->getIterator()), E = I->getParent()->end(); Scan != E; ++Scan) if (Scan->mayWriteToMemory()) @@ -3936,7 +3982,7 @@ if (OptBB) { auto *UserParent = *OptBB; // Okay, the CFG is simple enough, try to sink this instruction. - if (TryToSinkInstruction(I, UserParent)) { + if (TryToSinkInstruction(I, UserParent, TLI)) { LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); MadeIRChange = true; // We'll add uses of the sunk instruction below, but since Index: llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll =================================================================== --- llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll +++ llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll @@ -111,16 +111,40 @@ } declare i32 @unknown(i32* %dest) +declare i32 @unknown.as2(i32 addrspace(2)* %dest) -define i32 @sink_to_use(i1 %c) { -; CHECK-LABEL: @sink_to_use( +define i32 @sink_write_to_use(i1 %c) { +; CHECK-LABEL: @sink_write_to_use( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1:[0-9]+]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 ; CHECK: use_block: +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull writeonly [[VAR]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 4 + %var3 = call i32 @unknown(i32* writeonly %var) argmemonly nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + +define i32 @sink_readwrite_to_use(i1 %c) { +; CHECK-LABEL: @sink_readwrite_to_use( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1]] ; CHECK-NEXT: ret i32 [[VAR3]] ; entry: @@ -135,6 +159,108 @@ ret i32 %var3 } +define i32 @sink_bitcast(i1 %c) { +; CHECK-LABEL: @sink_bitcast( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i8, align 8 +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i8* [[VAR]] to i32* +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[BITCAST]]) #[[ATTR1]] +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i8, align 8 + %bitcast = bitcast i8* %var to i32* + %var3 = call i32 @unknown(i32* %bitcast) argmemonly nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + + +define i32 @sink_gep1(i1 %c) { +; CHECK-LABEL: @sink_gep1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR1:%.*]] = alloca [2 x i32], align 8 +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VAR1]], i64 0, i64 1 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[GEP]]) #[[ATTR1]] +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i64, align 8 + %bitcast = bitcast i64* %var to i32* + %gep = getelementptr i32, i32* %bitcast, i32 1 + %var3 = call i32 @unknown(i32* %gep) argmemonly nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + +define i32 @sink_gep2(i1 %c) { +; CHECK-LABEL: @sink_gep2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR1:%.*]] = alloca [2 x i32], align 8 +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: [[VAR1_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VAR1]], i64 0, i64 0 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR1_SUB]]) #[[ATTR1]] +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i64, align 8 + %bitcast = bitcast i64* %var to i32* + %var3 = call i32 @unknown(i32* %bitcast) argmemonly nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + +define i32 @sink_addrspacecast(i1 %c) { +; CHECK-LABEL: @sink_addrspacecast( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 8 +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast i32* [[VAR]] to i32 addrspace(2)* +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown.as2(i32 addrspace(2)* [[CAST]]) #[[ATTR1]] +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 8 + %cast = addrspacecast i32* %var to i32 addrspace(2)* + %var3 = call i32 @unknown.as2(i32 addrspace(2)* %cast) argmemonly nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + define i32 @neg_infinite_loop(i1 %c) { ; CHECK-LABEL: @neg_infinite_loop( ; CHECK-NEXT: entry: