Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3686,13 +3686,13 @@ /// beginning of DestBlock, which can only happen if it's safe to move the /// instruction past all of the instructions between it and the end of its /// block. -static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { +static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock, + TargetLibraryInfo &TLI) { assert(I->getUniqueUndroppableUser() && "Invariants didn't hold!"); BasicBlock *SrcBlock = I->getParent(); - // Cannot move control-flow-involving, volatile loads, vaarg, etc. - if (isa(I) || I->isEHPad() || I->mayHaveSideEffects() || - I->isTerminator()) + // Cannot move control-flow-involving instructions. + if (isa(I) || I->isEHPad() || I->isTerminator()) return false; // Do not sink static or dynamic alloca instructions. Static allocas must @@ -3726,6 +3726,21 @@ return false; } + // Even if instructions have side-effects, if they would be trivially dead + // along all paths, DSE removes that instruction. We can apply the same logic + // here to eliminate the instruction along paths it is not used, by sinking + // the instruction to the use block (provided we do not sink them past + // side-effecting instructions). + if (I->mayHaveSideEffects()) { + if (!wouldInstructionBeTriviallyDeadOnPathsWithoutUse(I, &TLI)) + return false; + for (BasicBlock::iterator Scan = std::next(I->getIterator()), + E = std::prev(I->getParent()->end()); + Scan != E; ++Scan) { + if (Scan->mayHaveSideEffects()) + return false; + } + } I->dropDroppableUses([DestBlock](const Use *U) { if (auto *I = dyn_cast(U->getUser())) return I->getParent() != DestBlock; @@ -3789,7 +3804,7 @@ } return true; -} + } bool InstCombinerImpl::run() { while (!Worklist.isEmpty()) { @@ -3895,7 +3910,7 @@ if (OptBB) { auto *UserParent = *OptBB; // Okay, the CFG is simple enough, try to sink this instruction. - if (TryToSinkInstruction(I, UserParent)) { + if (TryToSinkInstruction(I, UserParent, TLI)) { LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); MadeIRChange = true; // We'll add uses of the sunk instruction below, but since Index: llvm/test/Transforms/InstCombine/sink_instruction.ll =================================================================== --- llvm/test/Transforms/InstCombine/sink_instruction.ll +++ llvm/test/Transforms/InstCombine/sink_instruction.ll @@ -41,7 +41,7 @@ ; CHECK: bb1: ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[X_ADDR_17]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = sdiv i32 [[TMP1]], [[X_ADDR_17]] -; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @bar() #[[ATTR1:[0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @bar() ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: ; CHECK-NEXT: [[X_ADDR_0]] = phi i32 [ [[TMP2]], [[BB1]] ], [ [[X_ADDR_17]], [[BB]] ] @@ -216,3 +216,171 @@ ret i32 %sum.0 } +declare void @check(i8*) +declare noalias i8* @malloc(i32) willreturn nounwind +declare void @check2(i8*, i8*) + +declare void @checkd(double) +declare double @log(double) willreturn nounwind readnone +define void @test7(i1 %cond, double %d) { +; CHECK-LABEL: @test7( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[A:%.*]] = call double @log(double [[D:%.*]]) +; CHECK-NEXT: call void @checkd(double [[A]]) +; CHECK-NEXT: ret void +; CHECK: else: +; CHECK-NEXT: ret void +; + %A = call double @log(double %d) + br i1 %cond, label %if, label %else + +if: + call void @checkd(double %A) + ret void +else: + ret void +} + +; TODO: We can sink down the allocation to its only use. +define i8** @test_allocation_sink_bc(i1 %cond) { +; CHECK-LABEL: @test_allocation_sink_bc( +; CHECK-NEXT: [[A:%.*]] = call dereferenceable_or_null(16000) i8* @malloc(i32 16000) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[X:%.*]] = bitcast i8* [[A]] to i8** +; CHECK-NEXT: ret i8** [[X]] +; CHECK: else: +; CHECK-NEXT: [[B:%.*]] = call dereferenceable_or_null(24000) i8* @malloc(i32 24000) +; CHECK-NEXT: [[Y:%.*]] = bitcast i8* [[B]] to i8** +; CHECK-NEXT: ret i8** [[Y]] +; + %A = call i8* @malloc(i32 16000) + br i1 %cond, label %if, label %else + +if: + %X = bitcast i8* %A to i8** + ret i8** %X + + +else: + %B = call i8* @malloc(i32 24000) + %Y = bitcast i8* %B to i8** + ret i8** %Y +} + +; TODO: We can sink down the allocation to its only use. +define void @test_allocation_sink(i1 %cond) { +; CHECK-LABEL: @test_allocation_sink( +; CHECK-NEXT: [[A:%.*]] = call dereferenceable_or_null(16000) i8* @malloc(i32 16000) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: call void @check(i8* [[A]]) +; CHECK-NEXT: ret void +; CHECK: else: +; CHECK-NEXT: ret void +; + %A = call i8* @malloc(i32 16000) + br i1 %cond, label %if, label %else + +if: + call void @check(i8* %A) + ret void + +else: + ret void +} + +; TODO: We can sink this malloc as well down to the use block. +define void @test_allocation_sink_noalias_load(i1 %cond, i32 %i, i32* readonly %P) { +; CHECK-LABEL: @test_allocation_sink_noalias_load( +; CHECK-NEXT: [[A:%.*]] = call dereferenceable_or_null(16000) i8* @malloc(i32 16000) +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[I:%.*]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 [[IDXPROM]] +; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: call void @check(i8* [[A]]) +; CHECK-NEXT: ret void +; CHECK: else: +; CHECK-NEXT: [[X:%.*]] = call i32 @foo(i32 [[L]], i32 [[L]]) +; CHECK-NEXT: [[Y:%.*]] = call i32 @foo(i32 [[L]], i32 [[L]]) +; CHECK-NEXT: ret void +; + %A = call i8* @malloc(i32 16000) + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom + %L = load i32, i32* %arrayidx, align 4 + br i1 %cond, label %if, label %else + +if: + call void @check(i8* %A) + ret void + +else: + %X = call i32 @foo(i32 %L, i32 %L) + %Y = call i32 @foo(i32 %L, i32 %L) + ret void +} + +; TODO: We can sink both allocations to its only use without them being +; incorrectly reordered. +define void @test_allocation_sink2(i1 %cond) { +; CHECK-LABEL: @test_allocation_sink2( +; CHECK-NEXT: [[A:%.*]] = call dereferenceable_or_null(16000) i8* @malloc(i32 16000) +; CHECK-NEXT: [[B:%.*]] = call dereferenceable_or_null(16) i8* @malloc(i32 16) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: call void @check2(i8* [[A]], i8* [[B]]) +; CHECK-NEXT: ret void +; CHECK: else: +; CHECK-NEXT: ret void +; + %A = call i8* @malloc(i32 16000) + %B = call i8* @malloc(i32 16) + br i1 %cond, label %if, label %else + +if: + call void @check2(i8* %A, i8* %B) + ret void + +else: + ret void +} + +declare i8* @llvm.stacksave() +declare void @llvm.stackrestore(i8*) +; We cannot sink the stacksave to the only use (stackrestore), since that incorrectly modifies +; stack state (dynamic alloca in between). +define void @test_neg_stacksave(i1 %cond, i32 %size) { +; CHECK-LABEL: @test_neg_stacksave( +; CHECK-NEXT: [[TMP:%.*]] = call i8* @llvm.stacksave() +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[SIZE:%.*]] to i64 +; CHECK-NEXT: [[A:%.*]] = alloca i8, i64 [[TMP1]], align 1 +; CHECK-NEXT: call void @check(i8* nonnull [[A]]) +; CHECK-NEXT: br label [[NEXT:%.*]] +; CHECK: next: +; CHECK-NEXT: call void @llvm.stackrestore(i8* [[TMP]]) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[X:%.*]] = tail call i32 @bar() +; CHECK-NEXT: ret void +; CHECK: else: +; CHECK-NEXT: ret void +; + %tmp = call i8* @llvm.stacksave( ) + %A = alloca i8, i32 %size + call void @check(i8* %A) + br label %next + +next: + call void @llvm.stackrestore( i8* %tmp ) + br i1 %cond, label %if, label %else + +if: + %X = tail call i32 @bar() nounwind + ret void + +else: + ret void +}