Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3419,7 +3419,8 @@ } } - // See if we can trivially sink this instruction to a successor basic block. + // See if we can trivially sink this instruction to its user if we can + // prove that the successor is not executed more frequently than our block. if (EnableCodeSinking) if (Use *SingleUse = I->getSingleUndroppableUse()) { BasicBlock *BB = I->getParent(); @@ -3435,7 +3436,22 @@ if (UserParent != BB) { // See if the user is one of our successors that has only one // predecessor, so that we don't have to split the critical edge. - if (UserParent->getUniquePredecessor() == BB) { + bool ShouldSink = UserParent->getUniquePredecessor() == BB; + // Another option where we can sink is a block that ends with a + // terminator that does not pass control to other block (such as + // return or unreachable). In this case: + // - I dominates the User (by SSA form); + // - the User will be executed at most once. + // So sinking I down to User is always profitable or neutral. + // Only do it if I may not read or write memory to avoid dealing + // with alias analysis. + if (!ShouldSink && !I->mayReadOrWriteMemory()) { + auto *Term = UserParent->getTerminator(); + ShouldSink = isa(Term) || isa(Term); + } + if (ShouldSink) { + assert(DT.dominates(BB, UserParent) && + "Dominance relation broken?"); // Okay, the CFG is simple enough, try to sink this instruction. if (TryToSinkInstruction(I, UserParent)) { LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); Index: llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll =================================================================== --- llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll +++ llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -203,7 +203,6 @@ ; CHECK-LABEL: @pr26354( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* [[TMP:%.*]], align 16 -; CHECK-NEXT: [[E1:%.*]] = extractelement <2 x double> [[LD]], i32 0 ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[END:%.*]] ; CHECK: if: ; CHECK-NEXT: [[E2:%.*]] = extractelement <2 x double> [[LD]], i32 1 @@ -211,6 +210,7 @@ ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[PH:%.*]] = phi <4 x double> [ undef, [[ENTRY:%.*]] ], [ [[I1]], [[IF]] ] +; CHECK-NEXT: [[E1:%.*]] = extractelement <2 x double> [[LD]], i32 0 ; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x double> [[PH]], i32 1 ; CHECK-NEXT: [[MU:%.*]] = fmul double [[E1]], [[E3]] ; CHECK-NEXT: ret double [[MU]] Index: llvm/test/Transforms/InstCombine/overflow.ll =================================================================== --- llvm/test/Transforms/InstCombine/overflow.ll +++ llvm/test/Transforms/InstCombine/overflow.ll @@ -8,13 +8,13 @@ ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SADD:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[B:%.*]], i32 [[A:%.*]]) -; CHECK-NEXT: [[SADD_RESULT:%.*]] = extractvalue { i32, i1 } [[SADD]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = extractvalue { i32, i1 } [[SADD]], 1 ; CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: ; CHECK-NEXT: tail call void @throwAnExceptionOrWhatever() #2 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: +; CHECK-NEXT: [[SADD_RESULT:%.*]] = extractvalue { i32, i1 } [[SADD]], 0 ; CHECK-NEXT: ret i32 [[SADD_RESULT]] ; entry: Index: llvm/test/Transforms/InstCombine/sink_to_unreachable.ll =================================================================== --- llvm/test/Transforms/InstCombine/sink_to_unreachable.ll +++ llvm/test/Transforms/InstCombine/sink_to_unreachable.ll @@ -33,20 +33,18 @@ } -; TODO: %comparator and %signed can be sunk down to unreachable just as in -; test above. define void @test_02(i32 %x, i32 %y) { ; CHECK-LABEL: @test_02( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X]], [[Y]] -; CHECK-NEXT: [[SIGNED:%.*]] = select i1 [[C2]], i32 -1, i32 1 -; CHECK-NEXT: [[COMPARATOR:%.*]] = select i1 [[C1]], i32 0, i32 [[SIGNED]] +; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br i1 [[C2]], label [[EXIT:%.*]], label [[MEDIUM:%.*]] ; CHECK: medium: ; CHECK-NEXT: [[C3:%.*]] = icmp sgt i32 [[X]], [[Y]] ; CHECK-NEXT: br i1 [[C3]], label [[EXIT]], label [[UNREACHED:%.*]] ; CHECK: unreached: +; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X]], [[Y]] +; CHECK-NEXT: [[SIGNED:%.*]] = select i1 [[C2]], i32 -1, i32 1 +; CHECK-NEXT: [[COMPARATOR:%.*]] = select i1 [[C1]], i32 0, i32 [[SIGNED]] ; CHECK-NEXT: call void @use(i32 [[COMPARATOR]]) ; CHECK-NEXT: unreachable ; CHECK: exit: @@ -70,3 +68,37 @@ exit: ret void } + +define i32 @test_03(i32 %x, i32 %y) { +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[C2]], label [[EXIT:%.*]], label [[MEDIUM:%.*]] +; CHECK: medium: +; CHECK-NEXT: [[C3:%.*]] = icmp sgt i32 [[X]], [[Y]] +; CHECK-NEXT: br i1 [[C3]], label [[EXIT]], label [[UNREACHED:%.*]] +; CHECK: unreached: +; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X]], [[Y]] +; CHECK-NEXT: [[SIGNED:%.*]] = select i1 [[C2]], i32 -1, i32 1 +; CHECK-NEXT: [[COMPARATOR:%.*]] = select i1 [[C1]], i32 0, i32 [[SIGNED]] +; CHECK-NEXT: ret i32 [[COMPARATOR]] +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; +entry: + %c1 = icmp eq i32 %x, %y + %c2 = icmp slt i32 %x, %y + %signed = select i1 %c2, i32 -1, i32 1 + %comparator = select i1 %c1, i32 0, i32 %signed + br i1 %c2, label %exit, label %medium + +medium: + %c3 = icmp sgt i32 %x, %y + br i1 %c3, label %exit, label %unreached + +unreached: + ret i32 %comparator + +exit: + ret i32 0 +} Index: llvm/test/Transforms/PGOProfile/chr.ll =================================================================== --- llvm/test/Transforms/PGOProfile/chr.ll +++ llvm/test/Transforms/PGOProfile/chr.ll @@ -796,10 +796,6 @@ ; CHECK-LABEL: @test_chr_7_1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4 -; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2 -; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0 -; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43 -; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16 ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[J0]], 12 @@ -824,6 +820,10 @@ ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: +; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2 +; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0 +; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43 +; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16 ; CHECK-NEXT: ret i32 [[SUM2]] ; entry: @@ -1381,8 +1381,6 @@ ; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V6]], [[J0]] ; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43 ; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16 -; CHECK-NEXT: [[V5:%.*]] = icmp eq i32 [[I0]], [[SUM2]] -; CHECK-NEXT: [[SUM3:%.*]] = select i1 [[V5]], i32 [[SUM2]], i32 [[V8]], !prof !16 ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: [[V9:%.*]] = and i32 [[I0]], 4 ; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0 @@ -1391,6 +1389,8 @@ ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: +; CHECK-NEXT: [[V5:%.*]] = icmp eq i32 [[I0]], [[SUM2]] +; CHECK-NEXT: [[SUM3:%.*]] = select i1 [[V5]], i32 [[SUM2]], i32 [[V8]], !prof !16 ; CHECK-NEXT: [[V11:%.*]] = add i32 [[I0]], [[SUM3]] ; CHECK-NEXT: ret i32 [[V11]] ; @@ -2004,6 +2004,27 @@ ; Test a case with a really long use-def chains. This test checks that it's not ; really slow and doesn't appear to be hanging. define i64 @test_chr_22(i1 %i, i64* %j, i64 %v0) !prof !14 { +; CHECK-LABEL: @test_chr_22( +; CHECK-NEXT: bb0: +; CHECK-NEXT: [[V1:%.*]] = add i64 [[V0:%.*]], 3 +; CHECK-NEXT: [[V2:%.*]] = add i64 [[V1]], [[V0]] +; CHECK-NEXT: [[C1:%.*]] = icmp slt i64 [[V2]], 100 +; CHECK-NEXT: [[V300:%.*]] = mul i64 [[V2]], -8647960034816487527 +; CHECK-NEXT: [[V301:%.*]] = icmp ne i64 [[V300]], 100 +; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[C1]], [[V301]] +; CHECK-NEXT: br i1 [[TMP0]], label [[BB0_SPLIT:%.*]], label [[BB0_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0.split: +; CHECK-NEXT: [[V299:%.*]] = mul i64 [[V2]], 7860086430977039991 +; CHECK-NEXT: store i64 [[V299]], i64* [[J:%.*]], align 4 +; CHECK-NEXT: ret i64 99 +; CHECK: bb0.split.nonchr: +; CHECK-NEXT: [[V300_NONCHR:%.*]] = mul i64 [[V2]], -8647960034816487527 +; CHECK-NEXT: [[V301_NONCHR:%.*]] = icmp eq i64 [[V300_NONCHR]], 100 +; CHECK-NEXT: [[V302_NONCHR_V:%.*]] = select i1 [[V301_NONCHR]], i64 1938697607916024098, i64 7860086430977039991, !prof !16 +; CHECK-NEXT: [[V302_NONCHR:%.*]] = mul i64 [[V2]], [[V302_NONCHR_V]] +; CHECK-NEXT: store i64 [[V302_NONCHR]], i64* [[J]], align 4 +; CHECK-NEXT: ret i64 99 +; bb0: %v1 = add i64 %v0, 3 %v2 = add i64 %v1, %v0 @@ -2317,6 +2338,12 @@ ; test_chr_22 in that it has nested control structures (multiple scopes) and ; covers additional code. define i64 @test_chr_23(i64 %v0) !prof !14 { +; CHECK-LABEL: @test_chr_23( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[V0:%.*]], 50 +; CHECK-NEXT: [[V10:%.*]] = icmp ne i64 [[TMP0]], -50 +; CHECK-NEXT: ret i64 99 +; entry: %v1 = add i64 %v0, 3 %v2 = add i64 %v1, %v1 @@ -2465,6 +2492,25 @@ ; Test to not crash upon a 0:0 branch_weight metadata. define void @test_chr_24(i32* %i) !prof !14 { +; CHECK-LABEL: @test_chr_24( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !21 +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !21 +; CHECK: bb2: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; entry: %0 = load i32, i32* %i %1 = and i32 %0, 1 Index: llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll +++ llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll @@ -273,7 +273,6 @@ ; CHECK-NEXT: [[X1:%.*]] = icmp eq i32 [[A:%.*]], 0 ; CHECK-NEXT: [[Z2:%.*]] = select i1 [[X1]], i32 [[B:%.*]], i32 0 ; CHECK-NEXT: [[X2:%.*]] = icmp eq i32 [[B]], 0 -; CHECK-NEXT: [[Z4:%.*]] = select i1 [[X2]], i32 [[Z2]], i32 3 ; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[A]], [[B]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP3:%.*]], label [[TMP2:%.*]] @@ -282,6 +281,7 @@ ; CHECK-NEXT: store i32 [[SIMPLIFYCFG_MERGE]], i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[TMP3]] ; CHECK: 3: +; CHECK-NEXT: [[Z4:%.*]] = select i1 [[X2]], i32 [[Z2]], i32 3 ; CHECK-NEXT: ret i32 [[Z4]] ; entry: