Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -4968,6 +4968,112 @@ return true; } +static bool isSwitchDense(ArrayRef Values) { + // See also SelectionDAGBuilder::isDense(), which this function was based on. + uint64_t Diff; + if (Values.front() <= Values.back()) + Diff = (uint64_t)Values.back() - (uint64_t)Values.front(); + else + Diff = (uint64_t)Values.front() - (uint64_t)Values.back(); + uint64_t Range = Diff + 1; + uint64_t NumCases = Values.size(); + // 40% is the default density for building a jump table in optsize/minsize mode. + uint64_t MinDensity = 40; + + return NumCases * 100 >= Range * MinDensity; +} + +// Try and transform a switch that has "holes" in it to a contiguous sequence +// of cases. +// +// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be +// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}. +// +// This converts a sparse switch into a dense switch which allows better +// lowering and could also allow transforming into a lookup table. +static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, + const DataLayout &DL, + const TargetTransformInfo &TTI) { + if (SI->getCondition()->getType()->getIntegerBitWidth() > 64) + return false; + + // Only bother with this optimization if it can cause a good lowering of dense + // switches (for example a jump table) to be emitted. + // FIXME: Add a hook to TTI to query this, like TLI->getMinimumJumpTableEntries()? + if (SI->getNumCases() < 4) + return false; + + // This transform is agnostic to the signedness of the input or case values. We + // can treat the case values as signed or unsigned. We can optimize more common + // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values + // as signed. + SmallVector Values; + for (auto &C : SI->cases()) + Values.push_back(C.getCaseValue()->getValue().getSExtValue()); + std::sort(Values.begin(), Values.end()); + + // If the switch is already dense, there's nothing useful to do here. + if (isSwitchDense(Values)) + return false; + + // First, transform the values such that they start at zero and ascend. + uint64_t Base = Values[0]; + for (auto &V : Values) + V -= Base; + + // Now we have signed numbers that have been shifted so that, given enough + // precision, there are no negative values. Since the rest of the transform + // is bitwise only, we switch now to an unsigned representation. + uint64_t GCD = 0; + for (auto &V : Values) + GCD = llvm::GreatestCommonDivisor64(GCD, (uint64_t)V); + if (GCD <= 1 || !llvm::isPowerOf2_64(GCD)) + // We refuse to output a costly urem/udiv pair. + return false; + unsigned Shift = llvm::Log2_64(GCD); + + for (auto &V : Values) + V = (int64_t)((uint64_t)V >> Shift); + + if (!isSwitchDense(Values)) + // Transform didn't create a dense switch. + return false; + + // We can range-reduce this switch (X) to switch (X - Base) / Divisor. + // This should be profitable if the resulting switch has no holes - + // this should result in a better lowering. + auto *Ty = SI->getCondition()->getType(); + Builder.SetInsertPoint(SI); + auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base)); + auto *Div = Builder.CreateLShr(Sub, ConstantInt::get(Ty, Shift)); + auto *Rem = Builder.CreateAnd(Sub, ConstantInt::get(Ty, GCD-1)); + auto *Cmp = Builder.CreateICmpEQ(Rem, ConstantInt::get(Ty, 0)); + SI->replaceUsesOfWith(SI->getCondition(), Div); + + BasicBlock *OldBB = SI->getParent(); + SplitBlock(OldBB, SI); + OldBB->getTerminator()->eraseFromParent(); + BranchInst::Create(SI->getParent(), SI->getDefaultDest(), Cmp, OldBB); + + // Correct any PHIs at the destination. + for (auto &I : *SI->getDefaultDest()) { + auto *PN = dyn_cast(&I); + if (!PN) + break; + auto *V = PN->getIncomingValueForBlock(SI->getParent()); + PN->addIncoming(V, OldBB); + } + + for (auto &C : SI->cases()) { + auto *Orig = C.getCaseValue(); + auto BW = Orig->getValue().getBitWidth(); + auto Sub = Orig->getValue() - APInt(BW, Base); + auto *New = ConstantInt::get(Ty, Sub.lshr(APInt(BW, Shift))); + SI->replaceUsesOfWith(Orig, New); + } + return true; +} + bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { BasicBlock *BB = SI->getParent(); @@ -5011,6 +5117,9 @@ if (SwitchToLookupTable(SI, Builder, DL, TTI)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + if (ReduceSwitchRange(SI, Builder, DL, TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return false; } Index: test/Transforms/SimplifyCFG/rangereduce.ll =================================================================== --- /dev/null +++ test/Transforms/SimplifyCFG/rangereduce.ll @@ -0,0 +1,253 @@ +; RUN: opt < %s -simplifycfg -S | FileCheck %s + +; CHECK-LABEL: @test1 +; CHECK: %1 = sub i32 %a, 97 +; CHECK: %2 = lshr i32 %1, 2 +; CHECK: %3 = and i32 %1, 3 +; CHECK: %4 = icmp eq i32 %3, 0 +; CHECK: br i1 %4, label %.split, label %def +; CHECK:.split: +; CHECK: switch i32 %2, label %def [ +; CHECK: i32 0, label %one +; CHECK: i32 1, label %two +; CHECK: i32 2, label %three +; CHECK: i32 3, label %three +; CHECK: ] +; CHECK: phi i32 [ 8867, %.split ], [ 11984, %one ], [ 1143, %two ], [ 99783, %three ], [ 8867, %0 ] +define i32 @test1(i32 %a) { + switch i32 %a, label %def [ + i32 97, label %one + i32 101, label %two + i32 105, label %three + i32 109, label %three + ] + +def: + ret i32 8867 + +one: + ret i32 11984 +two: + ret i32 1143 +three: + ret i32 99783 +} + +; Optimization shouldn't trigger; bitwidth > 64 +; CHECK-LABEL: @test2 +; CHECK: switch i128 %a, label %def +define i128 @test2(i128 %a) { + switch i128 %a, label %def [ + i128 97, label %one + i128 101, label %two + i128 105, label %three + i128 109, label %three + ] + +def: + ret i128 8867 + +one: + ret i128 11984 +two: + ret i128 1143 +three: + ret i128 99783 +} + + +; Optimization shouldn't trigger; no holes present +; CHECK-LABEL: @test3 +; CHECK: switch i32 %a, label %def +define i32 @test3(i32 %a) { + switch i32 %a, label %def [ + i32 97, label %one + i32 98, label %two + i32 99, label %three + i32 100, label %three + ] + +def: + ret i32 8867 + +one: + ret i32 11984 +two: + ret i32 1143 +three: + ret i32 99783 +} + +; Optimization shouldn't trigger; not an arithmetic progression +; CHECK-LABEL: @test4 +; CHECK: switch i32 %a, label %def +define i32 @test4(i32 %a) { + switch i32 %a, label %def [ + i32 97, label %one + i32 102, label %two + i32 105, label %three + i32 109, label %three + ] + +def: + ret i32 8867 + +one: + ret i32 11984 +two: + ret i32 1143 +three: + ret i32 99783 +} + +; Optimization shouldn't trigger; not a power of two +; CHECK-LABEL: @test5 +; CHECK: switch i32 %a, label %def +define i32 @test5(i32 %a) { + switch i32 %a, label %def [ + i32 97, label %one + i32 102, label %two + i32 107, label %three + i32 112, label %three + ] + +def: + ret i32 8867 + +one: + ret i32 11984 +two: + ret i32 1143 +three: + ret i32 99783 +} + +; CHECK-LABEL: @test6 +; CHECK: %1 = sub i32 %a, -109 +; CHECK: %2 = lshr i32 %1, 2 +; CHECK: %3 = and i32 %1, 3 +; CHECK: %4 = icmp eq i32 %3, 0 +; CHECK: br i1 %4, label %.split, label %def +; CHECK:.split: +; CHECK: switch i32 %2, label %def [ +; CHECK: i32 3, label %one +; CHECK: i32 2, label %two +; CHECK: i32 1, label %three +; CHECK: i32 0, label %three +; CHECK: ] +; CHECK: phi i32 [ 8867, %.split ], [ 11984, %one ], [ 1143, %two ], [ 99783, %three ], [ 8867, %0 ] +define i32 @test6(i32 %a) { + switch i32 %a, label %def [ + i32 -97, label %one + i32 -101, label %two + i32 -105, label %three + i32 -109, label %three + ] + +def: + ret i32 8867 + +one: + ret i32 11984 +two: + ret i32 1143 +three: + ret i32 99783 +} + +; CHECK-LABEL: @test7 +; CHECK: %1 = sub i8 %a, -36 +; CHECK: %2 = lshr i8 %1, 2 +; CHECK: %3 = and i8 %1, 3 +; CHECK: %4 = icmp eq i8 %3, 0 +; CHECK: br i1 %4, label %.split, label %def +; CHECK:.split: +; CHECK: switch i8 %2, label %def [ +; CHECK: i8 0, label %one +; CHECK: i8 1, label %two +; CHECK: i8 2, label %three +; CHECK: i8 3, label %three +; CHECK: ] +define i8 @test7(i8 %a) { + switch i8 %a, label %def [ + i8 220, label %one + i8 224, label %two + i8 228, label %three + i8 232, label %three + ] + +def: + ret i8 8867 + +one: + ret i8 11984 +two: + ret i8 1143 +three: + ret i8 99783 +} + +; CHECK-LABEL: @test8 +; CHECK: %1 = sub i8 %a, -3 +; CHECK: %2 = lshr i8 %1, 2 +; CHECK: %3 = and i8 %1, 3 +; CHECK: %4 = icmp eq i8 %3, 0 +; CHECK: br i1 %4, label %.split, label %def +; CHECK:.split: +; CHECK: switch i8 %2, label %def [ +; CHECK: i8 0, label %one +; CHECK: i8 1, label %two +; CHECK: i8 2, label %three +; CHECK: i8 3, label %three +; CHECK: ] +define i8 @test8(i8 %a) { + switch i8 %a, label %def [ + i8 -3, label %one + i8 1, label %two + i8 5, label %three + i8 9, label %three + ] + +def: + ret i8 8867 + +one: + ret i8 11984 +two: + ret i8 1143 +three: + ret i8 99783 +} + +; CHECK-LABEL: @test9 +; CHECK: %1 = sub i32 %a, 97 +; CHECK: %2 = lshr i32 %1, 2 +; CHECK: %3 = and i32 %1, 3 +; CHECK: %4 = icmp eq i32 %3, 0 +; CHECK: br i1 %4, label %.split, label %def +; CHECK:.split: +; CHECK: switch i32 %2, label %def [ +; CHECK: i32 0, label %one +; CHECK: i32 1, label %two +; CHECK: i32 2, label %three +; CHECK: i32 4, label %three +; CHECK: ] +; CHECK: phi i32 [ 8867, %.split ], [ 11984, %one ], [ 1143, %two ], [ 99783, %three ], [ 8867, %0 ] +define i32 @test9(i32 %a) { + switch i32 %a, label %def [ + i32 97, label %one + i32 101, label %two + i32 105, label %three + i32 113, label %three + ] + +def: + ret i32 8867 + +one: + ret i32 11984 +two: + ret i32 1143 +three: + ret i32 99783 +}