Index: lib/Transforms/Scalar/SimpleLoopUnswitch.cpp =================================================================== --- lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -2044,6 +2044,18 @@ assert(UnswitchedSuccBBs.size() == 1 && "Only one possible unswitched block for a branch!"); BasicBlock *ClonedPH = ClonedPHs.begin()->second; + + // When considering multiple partially-unswitched invariants + // we cant just go replace them with constants in both branches. + // + // For 'AND' we infer that true branch ("continue") means true + // for each invariant operand. + // For 'OR' we can infer that false branch ("continue") means false + // for each invariant operand. + // So it happens that for multiple-partial case we dont replace + // in the unswitched branch. + bool ReplaceUnswitched = FullUnswitch || (Invariants.size() == 1); + ConstantInt *UnswitchedReplacement = Direction ? ConstantInt::getTrue(BI->getContext()) : ConstantInt::getFalse(BI->getContext()); @@ -2063,7 +2075,8 @@ // unswitched if in the cloned blocks. if (DT.dominates(LoopPH, UserI->getParent())) U->set(ContinueReplacement); - else if (DT.dominates(ClonedPH, UserI->getParent())) + else if (ReplaceUnswitched && + DT.dominates(ClonedPH, UserI->getParent())) U->set(UnswitchedReplacement); } } Index: test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll =================================================================== --- test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll +++ test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll @@ -2796,10 +2796,10 @@ ; CHECK: loop_begin.us: ; CHECK-NEXT: %[[V1_US:.*]] = load i1, i1* %ptr1 ; CHECK-NEXT: %[[V2_US:.*]] = load i1, i1* %ptr2 -; CHECK-NEXT: %[[AND1_US:.*]] = and i1 %[[V1_US]], false +; CHECK-NEXT: %[[AND1_US:.*]] = and i1 %[[V1_US]], %cond1 ; CHECK-NEXT: %[[OR1_US:.*]] = or i1 %[[V2_US]], %cond2 ; CHECK-NEXT: %[[AND2_US:.*]] = and i1 %[[AND1_US]], %[[OR1_US]] -; CHECK-NEXT: %[[AND3_US:.*]] = and i1 %[[AND2_US]], false +; CHECK-NEXT: %[[AND3_US:.*]] = and i1 %[[AND2_US]], %cond3 ; CHECK-NEXT: br label %loop_b.us ; ; CHECK: loop_b.us: @@ -2857,12 +2857,99 @@ ; CHECK-NEXT: ret } -; Non-trivial unswitching of a switch. -define i32 @test27(i1* %ptr, i32 %cond) { +; Non-trivial partial loop unswitching of multiple invariant inputs to an `or` +; chain. Basically an inverted version of corresponding `and` test (test26). +define i32 @test27(i1* %ptr1, i1* %ptr2, i1* %ptr3, i1 %cond1, i1 %cond2, i1 %cond3) { ; CHECK-LABEL: @test27( entry: br label %loop_begin ; CHECK-NEXT: entry: +; CHECK-NEXT: %[[INV_OR:.*]] = or i1 %cond3, %cond1 +; CHECK-NEXT: br i1 %[[INV_OR]], label %entry.split.us, label %entry.split + +loop_begin: + %v1 = load i1, i1* %ptr1 + %v2 = load i1, i1* %ptr2 + %cond_or1 = or i1 %v1, %cond1 + %cond_and1 = and i1 %v2, %cond2 + %cond_or2 = or i1 %cond_or1, %cond_and1 + %cond_or3 = or i1 %cond_or2, %cond3 + br i1 %cond_or3, label %loop_b, label %loop_a +; The 'loop_b' unswitched loop. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[V1_US:.*]] = load i1, i1* %ptr1 +; CHECK-NEXT: %[[V2_US:.*]] = load i1, i1* %ptr2 +; CHECK-NEXT: %[[OR1_US:.*]] = or i1 %[[V1_US]], %cond1 +; CHECK-NEXT: %[[AND1_US:.*]] = and i1 %[[V2_US]], %cond2 +; CHECK-NEXT: %[[OR2_US:.*]] = or i1 %[[OR1_US]], %[[AND1_US]] +; CHECK-NEXT: %[[OR3_US:.*]] = or i1 %[[OR2_US]], %cond3 +; CHECK-NEXT: br label %loop_b.us +; +; CHECK: loop_b.us: +; CHECK-NEXT: call i32 @b() +; CHECK-NEXT: br label %latch.us +; +; CHECK: latch.us: +; CHECK-NEXT: %[[V3_US:.*]] = load i1, i1* %ptr3 +; CHECK-NEXT: br i1 %[[V3_US]], label %loop_begin.us, label %loop_exit.split.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: br label %loop_exit + +; The original loop. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[V1:.*]] = load i1, i1* %ptr1 +; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr2 +; CHECK-NEXT: %[[OR1:.*]] = or i1 %[[V1]], false +; CHECK-NEXT: %[[AND1:.*]] = and i1 %[[V2]], %cond2 +; CHECK-NEXT: %[[OR2:.*]] = or i1 %[[OR1]], %[[AND1]] +; CHECK-NEXT: %[[OR3:.*]] = or i1 %[[OR2]], false +; CHECK-NEXT: br i1 %[[OR3]], label %loop_b, label %loop_a + +loop_a: + call i32 @a() + br label %latch +; CHECK: loop_a: +; CHECK-NEXT: call i32 @a() +; CHECK-NEXT: br label %latch + +loop_b: + call i32 @b() + br label %latch +; CHECK: loop_b: +; CHECK-NEXT: call i32 @b() +; CHECK-NEXT: br label %latch + +latch: + %v3 = load i1, i1* %ptr3 + br i1 %v3, label %loop_begin, label %loop_exit +; CHECK: latch: +; CHECK-NEXT: %[[V3:.*]] = load i1, i1* %ptr3 +; CHECK-NEXT: br i1 %[[V3]], label %loop_begin, label %loop_exit.split + +loop_exit: + ret i32 0 +; CHECK: loop_exit.split: +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: ret +} + +; Non-trivial unswitching of a switch. +define i32 @test28(i1* %ptr, i32 %cond) { +; CHECK-LABEL: @test28( +entry: + br label %loop_begin +; CHECK-NEXT: entry: ; CHECK-NEXT: switch i32 %cond, label %[[ENTRY_SPLIT_LATCH:.*]] [ ; CHECK-NEXT: i32 0, label %[[ENTRY_SPLIT_A:.*]] ; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_B:.*]] @@ -2970,8 +3057,8 @@ ; can introduce multiple edges to successors. These need lots of special case ; handling as they get collapsed in many cases (domtree, the unswitch itself) ; but not in all cases (the PHI node operands). -define i32 @test28(i32 %arg) { -; CHECK-LABEL: @test28( +define i32 @test29(i32 %arg) { +; CHECK-LABEL: @test29( entry: br label %header ; CHECK-NEXT: entry: @@ -3149,12 +3236,12 @@ ; CHECK-NEXT: ret i32 %[[EXIT_PHI2]] } -; Similar to @test28 but designed to have one of the duplicate edges be +; Similar to @test29 but designed to have one of the duplicate edges be ; a loop exit edge as those can in some cases be special. Among other things, ; this includes an LCSSA phi with multiple entries despite being a dedicated ; exit block. -define i32 @test29(i32 %arg) { -; CHECK-LABEL: define i32 @test29( +define i32 @test30(i32 %arg) { +; CHECK-LABEL: define i32 @test30( entry: br label %header ; CHECK-NEXT: entry: @@ -3946,8 +4033,8 @@ ; viable for unswitching the inner-most loop. This lets us check that the ; unswitching doesn't end up cycling infinitely even when the cycle is ; indirect and due to revisiting a loop after cloning. -define void @test30(i32 %arg) { -; CHECK-LABEL: define void @test30( +define void @test31(i32 %arg) { +; CHECK-LABEL: define void @test31( entry: br label %outer.header ; CHECK-NEXT: entry: Index: test/Transforms/SimpleLoopUnswitch/partial-unswitch-PR39568.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/partial-unswitch-PR39568.ll @@ -0,0 +1,83 @@ +; RUN: opt < %s -enable-nontrivial-unswitch -simple-loop-unswitch -S | FileCheck %s +; +; Test for the PR39568 bug - simple-loop-unswitch erroneously inferring +; constant values during partial loop unswitch. +; +; CHECK-LABEL: define i32 @partialOR +define i32 @partialOR(i8 %pre28, i8 %pr) { +entry: + %entry_check = icmp slt i8 %pre28, 4 + %pr_zero = icmp ne i8 %pr, 0 + br label %por.loop +por.loop: + %idx = phi i32 [ 1, %entry ], [ %idxplus, %por.loopexit ] + %idxcheck = icmp ne i32 %idx, 30 + %brmerge = or i1 %entry_check, %pr_zero + %brmerge2 = or i1 %brmerge, %idxcheck + br i1 %brmerge2, label %por.loopexit, label %deopt + +por.loopexit: +; By "partially" unswitching on (entry_check OR pr_zero) to be true +; we cant enforce BOTH to be true. +; Verifying that select is either on false (in 'false' branch) or on +; non-constant entry_check: +; +; CHECK-NOT: select i1 true, i32 {{9*}}, i32 0 +; CHECK-DAG: select i1 %entry_check{{.*}}, i32 9, i32 0 +; CHECK-DAG: select i1 %pr_zero{{.*}}, i32 99, i32 0 +; CHECK-DAG: select i1 %idxcheck{{.*}}, i32 999, i32 0 +; CHECK-DAG: select i1 false, i32 9, i32 0 +; CHECK-DAG: select i1 false, i32 99, i32 0 +; CHECK-DAG: select i1 %idxcheck{{.*}}, i32 999, i32 0 + %mux = select i1 %entry_check, i32 9, i32 0 + %mux2 = select i1 %pr_zero, i32 99, i32 0 + %mux3 = select i1 %idxcheck, i32 999, i32 0 + %idxplus = add nuw nsw i32 %idx, 1 + %check = icmp ugt i32 %idx, 241 + br i1 %check, label %return, label %por.loop + +return: + ret i32 %mux +deopt: + unreachable +} + +; CHECK-LABEL: define i32 @partialAND +define i32 @partialAND(i8 %pre28, i8 %pr) { +entry: + %entry_check = icmp slt i8 %pre28, 4 + %pr_zero = icmp ne i8 %pr, 0 + br label %pand.loop + +pand.loop: + %idx = phi i32 [ 1, %entry ], [ %idxplus, %pand.loopexit ] + %idxcheck = icmp ne i32 %idx, 30 + %brmerge = and i1 %entry_check, %pr_zero + %brmerge2 = and i1 %brmerge, %idxcheck + br i1 %brmerge2, label %deopt, label %pand.loopexit + +pand.loopexit: +; By "partially" unswitching on (entry_check AND pr_zero) to be false +; we cant enforce BOTH conditions to be false. +; Verifying that select is either on true (in 'true' branch) or on +; non-constant entry_check. +; +; CHECK-DAG: select i1 %entry_check{{.*}}, i32 0, i32 9 +; CHECK-DAG: select i1 %pr_zero{{.*}}, i32 0, i32 99 +; CHECK-DAG: select i1 %idxcheck{{.*}}, i32 0, i32 999 +; CHECK-DAG: select i1 true, i32 0, i32 9 +; CHECK-DAG: select i1 true, i32 0, i32 99 +; CHECK-DAG: select i1 %idxcheck{{.*}}, i32 0, i32 999 +; CHECK-NOT: select i1 false, i32 0, i3 {{9*}} + %mux = select i1 %entry_check, i32 0, i32 9 + %mux2 = select i1 %pr_zero, i32 0, i32 99 + %mux3 = select i1 %idxcheck, i32 0, i32 999 + %idxplus = add nuw nsw i32 %idx, 1 + %check = icmp ugt i32 %idx, 241 + br i1 %check, label %return, label %pand.loop + +return: + ret i32 %mux +deopt: + unreachable +} Index: test/Transforms/SimpleLoopUnswitch/trivial-unswitch-PR39568.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/trivial-unswitch-PR39568.ll @@ -0,0 +1,87 @@ +; RUN: opt < %s -enable-nontrivial-unswitch=false -simple-loop-unswitch -S | FileCheck %s +; +; Test for the PR39568 bug - simple-loop-unswitch erroneously inferring +; constant values during partial loop unswitch. Checking the same patterns +; for trivial unswitch. +; + +; CHECK-LABEL: define i32 @trivialPartialOR +define i32 @trivialPartialOR(i8 %pre28, i8 %pr) { +entry: + %entry_check = icmp slt i8 %pre28, 4 + %pr_zero = icmp ne i8 %pr, 0 + br label %por.loop +por.loop: + %idx = phi i32 [ 1, %entry ], [ %idxplus, %por.loopexit ] + %idxcheck = icmp ne i32 %idx, 30 + %brmerge = or i1 %entry_check, %pr_zero + %brmerge2 = or i1 %brmerge, %idxcheck + br i1 %brmerge2, label %return2, label %por.loopexit + +por.loopexit: +; By "partially" unswitching on (entry_check OR pr_zero) to be true +; we cant enforce BOTH to be true. +; Verifying that select is either on false (in 'false' branch) or on +; non-constant entry_check: +; +; CHECK-DAG: select i1 %entry_check{{.*}}, i32 9, i32 0 +; CHECK-DAG: select i1 %pr_zero{{.*}}, i32 99, i32 0 +; CHECK-DAG: select i1 %idxcheck{{.*}}, i32 999, i32 0 +; CHECK-DAG: select i1 false, i32 9, i32 0 +; CHECK-DAG: select i1 false, i32 99, i32 0 +; CHECK-NOT: select i1 true, i32 {{9*}}, i32 0 + %mux = select i1 %entry_check, i32 9, i32 0 + %mux2 = select i1 %pr_zero, i32 99, i32 0 + %mux3 = select i1 %idxcheck, i32 999, i32 0 + %idxplus = add nuw nsw i32 %idx, 1 + %check = icmp ugt i32 %idx, 241 + br i1 %check, label %return, label %por.loop + +return: + ret i32 %mux +return2: + %emux = select i1 %entry_check, i32 9, i32 0 + %emux2 = select i1 %pr_zero, i32 99, i32 0 + ret i32 %emux +} + +; CHECK-LABEL: define i32 @trivialPartialAND +define i32 @trivialPartialAND(i8 %pre28, i8 %pr) { +entry: + %entry_check = icmp slt i8 %pre28, 4 + %pr_zero = icmp ne i8 %pr, 0 + br label %loop +loop: + %idx = phi i32 [ 1, %entry ], [ %idxplus, %loopexit ] + %idxcheck = icmp ne i32 %idx, 30 + %brmerge = and i1 %entry_check, %pr_zero + %brmerge2 = and i1 %brmerge, %idxcheck + br i1 %brmerge2, label %loopexit, label %return2 +loopexit: +; By trivially "partially" unswitching (entry_check AND pr_zero) +; we should always be getting into the true branch only. +; Thus we dont have the same problem as with nontrivial unswitch +; yet checking it does not harm. +; Verifying that select is either on true (in 'true' branch) or on +; non-constant entry_check. +; +; CHECK-DAG: select i1 true, i32 0, i32 9 +; CHECK-DAG: select i1 true, i32 0, i32 99 +; CHECK-DAG: select i1 %idxcheck, i32 0, i32 999 +; CHECK-DAG: select i1 %entry_check, i32 0, i32 9 +; CHECK-DAG: select i1 %pr_zero, i32 0, i32 99 +; CHECK-NOT: select i1 false, i32 0, i32 {{9*}} + %mux = select i1 %entry_check, i32 0, i32 9 + %mux2 = select i1 %pr_zero, i32 0, i32 99 + %mux3 = select i1 %idxcheck, i32 0, i32 999 + %idxplus = add nuw nsw i32 %idx, 1 + %check = icmp ugt i32 %idx, 241 + br i1 %check, label %return, label %loop + +return: + ret i32 %mux +return2: + %emux = select i1 %entry_check, i32 0, i32 9 + %emux2 = select i1 %pr_zero, i32 0, i32 99 + ret i32 %emux +}