diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3302,6 +3302,11 @@ // We can only sink load instructions if there is nothing between the load and // the end of block that could change the value. if (I->mayReadFromMemory()) { + // We don't want to do any sophisticated alias analysis, so we only check + // the instructions after I in I's parent block if we try to sink to its + // successor block. + if (DestBlock->getUniquePredecessor() != I->getParent()) + return false; for (BasicBlock::iterator Scan = I->getIterator(), E = I->getParent()->end(); Scan != E; ++Scan) @@ -3419,7 +3424,8 @@ } } - // See if we can trivially sink this instruction to a successor basic block. + // See if we can trivially sink this instruction to its user if we can + // prove that the successor is not executed more frequently than our block. if (EnableCodeSinking) if (Use *SingleUse = I->getSingleUndroppableUse()) { BasicBlock *BB = I->getParent(); @@ -3435,7 +3441,20 @@ if (UserParent != BB) { // See if the user is one of our successors that has only one // predecessor, so that we don't have to split the critical edge. - if (UserParent->getUniquePredecessor() == BB) { + bool ShouldSink = UserParent->getUniquePredecessor() == BB; + // Another option where we can sink is a block that ends with a + // terminator that does not pass control to other block (such as + // return or unreachable). In this case: + // - I dominates the User (by SSA form); + // - the User will be executed at most once. + // So sinking I down to User is always profitable or neutral. + if (!ShouldSink) { + auto *Term = UserParent->getTerminator(); + ShouldSink = isa(Term) || isa(Term); + } + if (ShouldSink) { + assert(DT.dominates(BB, UserParent) && + "Dominance relation broken?"); // Okay, the CFG is simple enough, try to sink this instruction. if (TryToSinkInstruction(I, UserParent)) { LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll --- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll +++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -203,7 +203,6 @@ ; CHECK-LABEL: @pr26354( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* [[TMP:%.*]], align 16 -; CHECK-NEXT: [[E1:%.*]] = extractelement <2 x double> [[LD]], i32 0 ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[END:%.*]] ; CHECK: if: ; CHECK-NEXT: [[E2:%.*]] = extractelement <2 x double> [[LD]], i32 1 @@ -211,6 +210,7 @@ ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[PH:%.*]] = phi <4 x double> [ undef, [[ENTRY:%.*]] ], [ [[I1]], [[IF]] ] +; CHECK-NEXT: [[E1:%.*]] = extractelement <2 x double> [[LD]], i32 0 ; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x double> [[PH]], i32 1 ; CHECK-NEXT: [[MU:%.*]] = fmul double [[E1]], [[E3]] ; CHECK-NEXT: ret double [[MU]] diff --git a/llvm/test/Transforms/InstCombine/overflow.ll b/llvm/test/Transforms/InstCombine/overflow.ll --- a/llvm/test/Transforms/InstCombine/overflow.ll +++ b/llvm/test/Transforms/InstCombine/overflow.ll @@ -8,13 +8,13 @@ ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SADD:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[B:%.*]], i32 [[A:%.*]]) -; CHECK-NEXT: [[SADD_RESULT:%.*]] = extractvalue { i32, i1 } [[SADD]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = extractvalue { i32, i1 } [[SADD]], 1 ; CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: ; CHECK-NEXT: tail call void @throwAnExceptionOrWhatever() #2 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: +; CHECK-NEXT: [[SADD_RESULT:%.*]] = extractvalue { i32, i1 } [[SADD]], 0 ; CHECK-NEXT: ret i32 [[SADD_RESULT]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/sink_to_unreachable.ll b/llvm/test/Transforms/InstCombine/sink_to_unreachable.ll --- a/llvm/test/Transforms/InstCombine/sink_to_unreachable.ll +++ b/llvm/test/Transforms/InstCombine/sink_to_unreachable.ll @@ -3,6 +3,7 @@ ; RUN: opt -passes=instcombine -S < %s | FileCheck %s declare void @use(i32 %x) +declare i1 @cond() define void @test_01(i32 %x, i32 %y) { ; CHECK-LABEL: @test_01( @@ -33,20 +34,18 @@ } -; TODO: %comparator and %signed can be sunk down to unreachable just as in -; test above. define void @test_02(i32 %x, i32 %y) { ; CHECK-LABEL: @test_02( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X]], [[Y]] -; CHECK-NEXT: [[SIGNED:%.*]] = select i1 [[C2]], i32 -1, i32 1 -; CHECK-NEXT: [[COMPARATOR:%.*]] = select i1 [[C1]], i32 0, i32 [[SIGNED]] +; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br i1 [[C2]], label [[EXIT:%.*]], label [[MEDIUM:%.*]] ; CHECK: medium: ; CHECK-NEXT: [[C3:%.*]] = icmp sgt i32 [[X]], [[Y]] ; CHECK-NEXT: br i1 [[C3]], label [[EXIT]], label [[UNREACHED:%.*]] ; CHECK: unreached: +; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X]], [[Y]] +; CHECK-NEXT: [[SIGNED:%.*]] = select i1 [[C2]], i32 -1, i32 1 +; CHECK-NEXT: [[COMPARATOR:%.*]] = select i1 [[C1]], i32 0, i32 [[SIGNED]] ; CHECK-NEXT: call void @use(i32 [[COMPARATOR]]) ; CHECK-NEXT: unreachable ; CHECK: exit: @@ -70,3 +69,92 @@ exit: ret void } + +define i32 @test_03(i32 %x, i32 %y) { +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[C2]], label [[EXIT:%.*]], label [[MEDIUM:%.*]] +; CHECK: medium: +; CHECK-NEXT: [[C3:%.*]] = icmp sgt i32 [[X]], [[Y]] +; CHECK-NEXT: br i1 [[C3]], label [[EXIT]], label [[UNREACHED:%.*]] +; CHECK: unreached: +; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[X]], [[Y]] +; CHECK-NEXT: [[SIGNED:%.*]] = select i1 [[C2]], i32 -1, i32 1 +; CHECK-NEXT: [[COMPARATOR:%.*]] = select i1 [[C1]], i32 0, i32 [[SIGNED]] +; CHECK-NEXT: ret i32 [[COMPARATOR]] +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; +entry: + %c1 = icmp eq i32 %x, %y + %c2 = icmp slt i32 %x, %y + %signed = select i1 %c2, i32 -1, i32 1 + %comparator = select i1 %c1, i32 0, i32 %signed + br i1 %c2, label %exit, label %medium + +medium: + %c3 = icmp sgt i32 %x, %y + br i1 %c3, label %exit, label %unreached + +unreached: + ret i32 %comparator + +exit: + ret i32 0 +} + +define i32 @test_04(i32 %x, i1 %c) { +; CHECK-LABEL: @test_04( +; CHECK-NEXT: bb0: +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[BB1]] ], [ 1, [[BB2]] ] +; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], 1 +; CHECK-NEXT: [[R:%.*]] = add i32 [[P]], [[A]] +; CHECK-NEXT: ret i32 [[R]] +; +bb0: + %a = add i32 %x, 1 + br i1 %c, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + %p = phi i32 [0, %bb1], [1, %bb2] + %r = add i32 %p, %a + ret i32 %r +} + +; Do not sink into a potentially hotter block. +define i32 @test_05_neg(i32 %x, i1 %cond) { +; CHECK-LABEL: @test_05_neg( +; CHECK-NEXT: bb0: +; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], 1 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[CALL:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[CALL]], label [[BB2]], label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[BB1]] ], [ [[A]], [[BB2]] ] +; CHECK-NEXT: ret i32 [[P]] +; +bb0: + %a = add i32 %x, 1 + br i1 %cond, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + %call = call i1 @cond() + br i1 %call, label %bb2, label %bb3 +bb3: + %p = phi i32 [0, %bb1], [%a, %bb2] + ret i32 %p +} diff --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll --- a/llvm/test/Transforms/PGOProfile/chr.ll +++ b/llvm/test/Transforms/PGOProfile/chr.ll @@ -796,10 +796,6 @@ ; CHECK-LABEL: @test_chr_7_1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4 -; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2 -; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0 -; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43 -; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16 ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[J0]], 12 @@ -824,6 +820,10 @@ ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: +; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2 +; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0 +; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43 +; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16 ; CHECK-NEXT: ret i32 [[SUM2]] ; entry: @@ -1381,8 +1381,6 @@ ; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V6]], [[J0]] ; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43 ; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16 -; CHECK-NEXT: [[V5:%.*]] = icmp eq i32 [[I0]], [[SUM2]] -; CHECK-NEXT: [[SUM3:%.*]] = select i1 [[V5]], i32 [[SUM2]], i32 [[V8]], !prof !16 ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: [[V9:%.*]] = and i32 [[I0]], 4 ; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0 @@ -1391,6 +1389,8 @@ ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: +; CHECK-NEXT: [[V5:%.*]] = icmp eq i32 [[I0]], [[SUM2]] +; CHECK-NEXT: [[SUM3:%.*]] = select i1 [[V5]], i32 [[SUM2]], i32 [[V8]], !prof !16 ; CHECK-NEXT: [[V11:%.*]] = add i32 [[I0]], [[SUM3]] ; CHECK-NEXT: ret i32 [[V11]] ; diff --git a/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll b/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll --- a/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll +++ b/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll @@ -273,7 +273,6 @@ ; CHECK-NEXT: [[X1:%.*]] = icmp eq i32 [[A:%.*]], 0 ; CHECK-NEXT: [[Z2:%.*]] = select i1 [[X1]], i32 [[B:%.*]], i32 0 ; CHECK-NEXT: [[X2:%.*]] = icmp eq i32 [[B]], 0 -; CHECK-NEXT: [[Z4:%.*]] = select i1 [[X2]], i32 [[Z2]], i32 3 ; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[A]], [[B]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP3:%.*]], label [[TMP2:%.*]] @@ -282,6 +281,7 @@ ; CHECK-NEXT: store i32 [[SIMPLIFYCFG_MERGE]], i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[TMP3]] ; CHECK: 3: +; CHECK-NEXT: [[Z4:%.*]] = select i1 [[X2]], i32 [[Z2]], i32 3 ; CHECK-NEXT: ret i32 [[Z4]] ; entry: