Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -1508,20 +1508,62 @@ assert(BI1->isUnconditional()); BasicBlock *BBEnd = BI1->getSuccessor(0); - // We currently only support branch targets with two predecessors. - // FIXME: this is an arbitrary restriction and should be lifted. - SmallVector Blocks; - for (auto *BB : predecessors(BBEnd)) - Blocks.push_back(BB); - if (Blocks.size() != 2 || - !all_of(Blocks, [](const BasicBlock *BB) { - auto *BI = dyn_cast(BB->getTerminator()); - return BI && BI->isUnconditional(); - })) + // We support two situations: + // (1) all incoming arcs are unconditional + // (2) one incoming arc is conditional + // + // (2) is very common in switch defaults and + // else-if patterns; + // + // if (a) f(1); + // else if (b) f(2); + // + // produces: + // + // [if] + // / \ + // [f(1)] [if] + // | | \ + // | | \ + // | [f(2)]| + // \ | / + // [ end ] + // + // [end] has two unconditional predecessor arcs and one conditional. The + // conditional refers to the implicit empty 'else' arc. This conditional + // arc can also be caused by an empty default block in a switch. + // + // In this case, we attempt to sink code from all *unconditional* arcs. + // If we can sink instructions from these arcs (determined during the scan + // phase below) we insert a common successor for all unconditional arcs and + // connect that to [end], to enable sinking: + // + // [if] + // / \ + // [x(1)] [if] + // | | \ + // | | \ + // | [x(2)] | + // \ / | + // [sink.split] | + // \ / + // [ end ] + // + SmallVector UnconditionalPreds; + Instruction *Cond = nullptr; + for (auto *B : predecessors(BBEnd)) { + auto *T = B->getTerminator(); + if (isa(T) && cast(T)->isUnconditional()) + UnconditionalPreds.push_back(B); + else if ((isa(T) || isa(T)) && !Cond) + Cond = T; + else + return false; + } + if (UnconditionalPreds.size() < 2) return false; - + bool Changed = false; - // We take a two-step approach to tail sinking. First we scan from the end of // each block upwards in lockstep. If the n'th instruction from the end of each // block can be sunk, those instructions are added to ValuesToSink and we @@ -1531,7 +1573,7 @@ unsigned ScanIdx = 0; SmallPtrSet InstructionsToSink; DenseMap> PHIOperands; - LockstepReverseIterator LRI(Blocks); + LockstepReverseIterator LRI(UnconditionalPreds); while (LRI.isValid() && canSinkInstructions(*LRI, PHIOperands)) { DEBUG(dbgs() << "SINK: instruction can be sunk: " << (*LRI)[0] << "\n"); @@ -1540,6 +1582,17 @@ --LRI; } + if (ScanIdx > 0 && Cond) { + DEBUG(dbgs() << "SINK: Splitting edge\n"); + // We have a conditional edge and we're going to sink some instructions. + // Insert a new block postdominating all blocks we're going to sink from. + if (!SplitBlockPredecessors(BI1->getSuccessor(0), UnconditionalPreds, + ".sink.split")) + // Edges couldn't be split. + return false; + Changed = true; + } + // Now that we've analyzed all potential sinking candidates, perform the // actual sink. We iteratively sink the last non-terminator of the source // blocks into their common successor unless doing so would require too @@ -1554,7 +1607,7 @@ // This is unlikely in practice though. for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) { DEBUG(dbgs() << "SINK: Sink: " - << *Blocks[0]->getTerminator()->getPrevNode() + << *UnconditionalPreds[0]->getTerminator()->getPrevNode() << "\n"); // Because we've sunk every instruction in turn, the current instruction to // sink is always at index 0. @@ -1565,18 +1618,19 @@ if (InstructionsToSink.count(V) == 0) ++NumPHIdValues; DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n"); - assert((NumPHIdValues % Blocks.size() == 0) && + assert((NumPHIdValues % UnconditionalPreds.size() == 0) && "Every operand must either be PHId or not PHId!"); - if (NumPHIdValues / Blocks.size() > 1) + if (NumPHIdValues / UnconditionalPreds.size() > 1) { // Too many PHIs would be created. + DEBUG(dbgs() << "SINK: stopping here, too many PHIs would be created!\n"); break; + } - sinkLastInstruction(Blocks); + sinkLastInstruction(UnconditionalPreds); NumSinkCommons++; Changed = true; } - return Changed; } Index: test/CodeGen/AArch64/arm64-jumptable.ll =================================================================== --- test/CodeGen/AArch64/arm64-jumptable.ll +++ test/CodeGen/AArch64/arm64-jumptable.ll @@ -2,25 +2,26 @@ ; RUN: llc -mtriple=arm64-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-LINUX ; -define void @sum(i32* %to) { +define void @sum(i32 %a, i32* %to, i32 %c) { entry: - switch i32 undef, label %exit [ + switch i32 %a, label %exit [ i32 1, label %bb1 i32 2, label %bb2 i32 3, label %bb3 i32 4, label %bb4 ] bb1: - store i32 undef, i32* %to + %b = add i32 %c, 1 + store i32 %b, i32* %to br label %exit bb2: - store i32 undef, i32* %to + store i32 2, i32* %to br label %exit bb3: - store i32 undef, i32* %to + store i32 3, i32* %to br label %exit bb4: - store i32 undef, i32* %to + store i32 4, i32* %to br label %exit exit: ret void Index: test/CodeGen/AArch64/branch-folder-merge-mmos.ll =================================================================== --- test/CodeGen/AArch64/branch-folder-merge-mmos.ll +++ test/CodeGen/AArch64/branch-folder-merge-mmos.ll @@ -3,7 +3,7 @@ ; Function Attrs: norecurse nounwind define void @foo(i32 %a, i32 %b, float* nocapture %foo_arr) #0 { -; CHECK: (load 4 from %ir.arrayidx1.{{i[1-2]}}), (load 4 from %ir.arrayidx1.{{i[1-2]}}) +; CHECK: (load 4 from %ir.arrayidx1.{{i[1-2]}}) entry: %cmp = icmp sgt i32 %a, 0 br i1 %cmp, label %if.then, label %if.end Index: test/CodeGen/AArch64/ifcvt-select.ll =================================================================== --- test/CodeGen/AArch64/ifcvt-select.ll +++ test/CodeGen/AArch64/ifcvt-select.ll @@ -4,7 +4,7 @@ define i32 @foo(i32 %a, i32 %b) { entry: ;CHECK-LABEL: foo: -;CHECK: csinc +;CHECK: cneg ;CHECK-NOT: csel %sub = sub nsw i32 %b, %a %cmp10 = icmp sgt i32 %a, 0 Index: test/CodeGen/AArch64/rm_redundant_cmp.ll =================================================================== --- test/CodeGen/AArch64/rm_redundant_cmp.ll +++ test/CodeGen/AArch64/rm_redundant_cmp.ll @@ -11,9 +11,9 @@ define void @test_i16_2cmp_signed_1() { ; CHECK-LABEL: test_i16_2cmp_signed_1 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.gt +; CHECK-NEXT: b.lt ; CHECK-NOT: cmp -; CHECK: b.ne +; CHECK: ret entry: %0 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2 %1 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2 @@ -39,7 +39,7 @@ define void @test_i16_2cmp_signed_2() { ; CHECK-LABEL: test_i16_2cmp_signed_2 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.le +; CHECK-NEXT: b.gt ; CHECK-NOT: cmp ; CHECK: b.ge entry: @@ -67,9 +67,9 @@ define void @test_i16_2cmp_unsigned_1() { ; CHECK-LABEL: test_i16_2cmp_unsigned_1 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.hi +; CHECK-NEXT: b.lo ; CHECK-NOT: cmp -; CHECK: b.ne +; CHECK: ret entry: %0 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2 %1 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2 @@ -95,7 +95,7 @@ define void @test_i16_2cmp_unsigned_2() { ; CHECK-LABEL: test_i16_2cmp_unsigned_2 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.ls +; CHECK-NEXT: b.hi ; CHECK-NOT: cmp ; CHECK: b.hs entry: @@ -132,9 +132,9 @@ define void @test_i8_2cmp_signed_1() { ; CHECK-LABEL: test_i8_2cmp_signed_1 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.gt +; CHECK-NEXT: b.lt ; CHECK-NOT: cmp -; CHECK: b.ne +; CHECK: ret entry: %0 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2 %1 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2 @@ -160,7 +160,7 @@ define void @test_i8_2cmp_signed_2() { ; CHECK-LABEL: test_i8_2cmp_signed_2 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.le +; CHECK-NEXT: b.gt ; CHECK-NOT: cmp ; CHECK: b.ge entry: @@ -188,9 +188,9 @@ define void @test_i8_2cmp_unsigned_1() { ; CHECK-LABEL: test_i8_2cmp_unsigned_1 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.hi +; CHECK-NEXT: b.lo ; CHECK-NOT: cmp -; CHECK: b.ne +; CHECK: ret entry: %0 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2 %1 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2 @@ -216,7 +216,7 @@ define void @test_i8_2cmp_unsigned_2() { ; CHECK-LABEL: test_i8_2cmp_unsigned_2 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.ls +; CHECK-NEXT: b.hi ; CHECK-NOT: cmp ; CHECK: b.hs entry: Index: test/MC/ARM/data-in-code.ll =================================================================== --- test/MC/ARM/data-in-code.ll +++ test/MC/ARM/data-in-code.ll @@ -9,7 +9,7 @@ ;; Ensure that if a jump table is generated that it has Mapping Symbols ;; marking the data-in-code region. -define void @foo(i32* %ptr) nounwind ssp { +define void @foo(i32* %ptr, i32 %b) nounwind ssp { %tmp = load i32, i32* %ptr, align 4 switch i32 %tmp, label %exit [ i32 0, label %bb0 @@ -18,7 +18,7 @@ i32 3, label %bb3 ] bb0: - store i32 0, i32* %ptr, align 4 + store i32 %b, i32* %ptr, align 4 br label %exit bb1: store i32 1, i32* %ptr, align 4 Index: test/Transforms/SimplifyCFG/sink-common-code.ll =================================================================== --- test/Transforms/SimplifyCFG/sink-common-code.ll +++ test/Transforms/SimplifyCFG/sink-common-code.ll @@ -393,3 +393,113 @@ ; CHECK: getelementptr ; CHECK: load ; CHECK-NOT: load + +define zeroext i1 @test16(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.else: + br i1 %flag2, label %if.then2, label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +; CHECK-LABEL: test16 +; CHECK: zext +; CHECK-NOT: zext + +define zeroext i1 @test17(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + switch i32 %flag, label %if.end [ + i32 0, label %if.then + i32 1, label %if.then2 + ] + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %entry ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +; CHECK-LABEL: test17 +; CHECK: if.then: +; CHECK-NEXT: icmp uge +; CHECK-NEXT: br label %[[x:.*]] + +; CHECK: if.then2: +; CHECK-NEXT: add +; CHECK-NEXT: icmp ule +; CHECK-NEXT: br label %[[x]] + +; CHECK: [[x]]: +; CHECK-NEXT: %[[y:.*]] = phi i1 [ %cmp +; CHECK-NEXT: %[[z:.*]] = zext i1 %[[y]] +; CHECK-NEXT: br label %if.end + +; CHECK: if.end: +; CHECK-NEXT: phi i8 +; CHECK-DAG: [ %[[z]], %[[x]] ] +; CHECK-DAG: [ 0, %entry ] + +define zeroext i1 @test18(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + switch i32 %flag, label %if.then3 [ + i32 0, label %if.then + i32 1, label %if.then2 + ] + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.then3: + %add2 = add i32 %nblks, %blksA + %cmp3 = icmp ule i32 %add2, %blksA + %frombool4 = zext i1 %cmp3 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ %frombool4, %if.then3 ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +; CHECK-LABEL: test18 +; CHECK: if.end: +; CHECK-NEXT: %[[x:.*]] = phi i1 +; CHECK-DAG: [ %cmp, %if.then ] +; CHECK-DAG: [ %cmp2, %if.then2 ] +; CHECK-DAG: [ %cmp3, %if.then3 ] +; CHECK-NEXT: zext i1 %[[x]] to i8