Index: lib/Transforms/Scalar/LoopUnswitch.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnswitch.cpp +++ lib/Transforms/Scalar/LoopUnswitch.cpp @@ -80,6 +80,7 @@ struct LoopProperties { unsigned CanBeUnswitchedCount; + unsigned WasUnswitchedCount; unsigned SizeEstimation; UnswitchedValsMap UnswitchedVals; }; @@ -119,6 +120,10 @@ // Check was this case value unswitched before or not. bool isUnswitched(const SwitchInst *SI, const Value *V); + // Returns true if another unswitching could be done within the cost + // threshold. + bool CostAllowsUnswitching(); + // Clone all loop-unswitch related loop properties. // Redistribute unswitching quotas. // Note, that new loop data is stored inside the VMap. @@ -248,6 +253,7 @@ Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5); Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); + Props.WasUnswitchedCount = 0; MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount; if (Metrics.notDuplicatable) { @@ -258,13 +264,6 @@ } } - if (!Props.CanBeUnswitchedCount) { - DEBUG(dbgs() << "NOT unswitching loop %" - << L->getHeader()->getName() << ", cost too high: " - << L->getBlocks().size() << "\n"); - return false; - } - // Be careful. This links are good only before new loop addition. CurrentLoopProperties = &Props; CurLoopInstructions = &Props.UnswitchedVals; @@ -279,7 +278,8 @@ if (LIt != LoopsProperties.end()) { LoopProperties &Props = LIt->second; - MaxSize += Props.CanBeUnswitchedCount * Props.SizeEstimation; + MaxSize += (Props.CanBeUnswitchedCount + Props.WasUnswitchedCount) + * Props.SizeEstimation; LoopsProperties.erase(LIt); } @@ -299,6 +299,10 @@ return (*CurLoopInstructions)[SI].count(V); } +bool LUAnalysisCache::CostAllowsUnswitching() { + return CurrentLoopProperties->CanBeUnswitchedCount > 0; +} + // Clone all loop-unswitch related loop properties. // Redistribute unswitching quotas. // Note, that new loop data is stored inside the VMap. @@ -312,6 +316,8 @@ // Reallocate "can-be-unswitched quota" --OldLoopProps.CanBeUnswitchedCount; + ++OldLoopProps.WasUnswitchedCount; + NewLoopProps.WasUnswitchedCount = 0; unsigned Quota = OldLoopProps.CanBeUnswitchedCount; NewLoopProps.CanBeUnswitchedCount = Quota / 2; OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2; @@ -656,6 +662,14 @@ } // Check to see if it would be profitable to unswitch current loop. + if (!BranchesInfo.CostAllowsUnswitching()) { + DEBUG(dbgs() << "NOT unswitching loop %" + << currentLoop->getHeader()->getName() + << " at non-trivial condition '" + << *Val << "' == " << *LoopCond << "\n" + << ". Cost too high.\n"); + return false; + } // Do not do non-trivial unswitch while optimizing for size. if (OptimizeForSize || F->hasFnAttribute(Attribute::OptimizeForSize)) Index: test/Transforms/LoopUnswitch/2015-06-10-Threshold.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnswitch/2015-06-10-Threshold.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -loop-unswitch --loop-unswitch-threshold 4 \ +; RUN: -verify-loop-info -S < %s 2>&1 -stats | FileCheck %s + +; Test for a bug. This tests that loop unswitching decisions in +; different functions are independent. This was previously not the +; case; the threshold of 4 would in effect be set to 0 after the first +; unswitching and carried over as 0 into the next function. + +; CHECK-LABEL: @test1( +define i32 @test1(i32* %var) { +entry: +; The loop should be unswitched on %c == true or %c == false, producing +; two versions of the loop. The unswitch cost is 4. + %c = icmp eq i32 1, 2 + br label %loop + +loop: + %i = phi i32 [0, %entry], [%nexti, %loop] + %incby = select i1 %c, i32 10, i32 20 +; CHECK-DAG: add i32 %i.us, 10 +; CHECK-DAG: add i32 %i, 20 + %nexti = add i32 %i, %incby + %iterate = icmp eq i32 %nexti, 40 + br i1 %iterate, label %loop, label %loop_exit + +; CHECK: loop_exit: +loop_exit: + ret i32 0 +} + +; test2 is identical to test1, so the same thing should happen here as there. +; CHECK-LABEL: @test2( +define i32 @test2(i32* %var) { +entry: + %c = icmp eq i32 1, 2 + br label %loop + +loop: + %i = phi i32 [0, %entry], [%nexti, %loop] + %incby = select i1 %c, i32 10, i32 20 +; CHECK-DAG: add i32 %i.us, 10 +; CHECK-DAG: add i32 %i, 20 + %nexti = add i32 %i, %incby + %iterate = icmp eq i32 %nexti, 40 + br i1 %iterate, label %loop, label %loop_exit + +; CHECK: loop_exit: +loop_exit: + ret i32 0 +} + + +; CHECK: Statistics Collected +; CHECK: 2 loop-unswitch - Number of selects unswitched Index: test/Transforms/LoopUnswitch/2015-06-10-Trivial.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnswitch/2015-06-10-Trivial.ll @@ -0,0 +1,68 @@ +; RUN: opt < %s -loop-unswitch --loop-unswitch-threshold 4 \ +; RUN: -verify-loop-info -S < %s 2>&1 -stats | FileCheck %s + +; Test for a bug. This tests that trivial unswitching happens even if +; the computed unswitching cost is above the threshold and that +; unswitching other loops in the same function does not prevent +; unswitching on trivial conditions. + +define i32 @test(i32* %var) { +; CHECK-LABEL: @test( +; The first loop should be unswitched trivially on %c1 == false, leaving +; one version of the loop with %c1 == true and no version for %c1 == false. +; The unswitch cost is 5, above the threshold, but that should not matter +; for trivial unswitching. + %c1 = icmp eq i32 1, 1 +; CHECK: br i1 %c1 + br label %loop1 + +loop1: +; CHECK-NOT: br i1 false +; CHECK: br i1 true +; CHECK-NOT: br i1 false + %dummy1 = add i32 1, 1 ; to make the unswitch cost 5 + %dummy2 = add i32 1, 1 + %dummy3 = add i32 1, 1 + %dummy4 = add i32 1, 1 + br i1 %c1, label %loop1, label %loop1_exit + +; CHECK: loop1_exit: +loop1_exit: +; The second loop should be unswitched non-trivially on %c2 == true or +; %c2 == false, producing two versions of the loop. The unswitch cost is 4. + %c2 = icmp eq i32 1, 2 + br label %loop2 + +loop2: + %i = phi i32 [0, %loop1_exit], [%nexti, %loop2] + %incby = select i1 %c2, i32 10, i32 20 +; CHECK-DAG: add i32 %i.us, 10 +; CHECK-DAG: add i32 %i, 20 + %nexti = add i32 %i, %incby + %iterate = icmp eq i32 %nexti, 40 + br i1 %iterate, label %loop2, label %loop2_exit + +; CHECK: loop2_exit: +loop2_exit: + %c3 = icmp eq i32 1, 1 + br label %loop3 + +; The third loop is trivially unswitchable like the first one, the only +; difference is that the unswitchable condition is %c3 == true, not false. +loop3: +; CHECK-NOT: br i1 true +; CHECK: br i1 false +; CHECK-NOT: br i1 true + %dummy5 = add i32 1, 1 ; to make the unswitch cost 5 + %dummy6 = add i32 1, 1 + %dummy7 = add i32 1, 1 + %dummy8 = add i32 1, 1 + br i1 %c3, label %loop3_exit, label %loop3 + +; CHECK: loop3_exit: +loop3_exit: + ret i32 0 +} + +; CHECK: Statistics Collected +; CHECK: 2 loop-unswitch - Number of unswitches that are trivial \ No newline at end of file