Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -5490,17 +5490,6 @@ return true; } -static bool isSwitchDense(ArrayRef Values) { - // See also SelectionDAGBuilder::isDense(), which this function was based on. - uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front(); - uint64_t Range = Diff + 1; - uint64_t NumCases = Values.size(); - // 40% is the default density for building a jump table in optsize/minsize mode. - uint64_t MinDensity = 40; - - return NumCases * 100 >= Range * MinDensity; -} - /// Try to transform a switch that has "holes" in it to a contiguous sequence /// of cases. /// @@ -5512,6 +5501,7 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI) { + bool MadeChanges = false; auto *CondTy = cast(SI->getCondition()->getType()); if (CondTy->getIntegerBitWidth() > 64 || !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth())) @@ -5521,49 +5511,53 @@ if (SI->getNumCases() < 4) return false; - // This transform is agnostic to the signedness of the input or case values. We - // can treat the case values as signed or unsigned. We can optimize more common - // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values - // as signed. - SmallVector Values; + SmallVector Values; for (auto &C : SI->cases()) - Values.push_back(C.getCaseValue()->getValue().getSExtValue()); + Values.push_back(C.getCaseValue()->getLimitedValue()); llvm::sort(Values); - // If the switch is already dense, there's nothing useful to do here. - if (isSwitchDense(Values)) - return false; - - // First, transform the values such that they start at zero and ascend. - int64_t Base = Values[0]; - for (auto &V : Values) - V -= (uint64_t)(Base); + // Find the element that has the most distance from it's previous, wrapping around. + uint64_t BestDistance = APInt::getMaxValue(CondTy->getIntegerBitWidth()).getLimitedValue() - + Values.back() + Values.front() + 1; + unsigned BestIndex = 0; + for (unsigned i = 1;i != Values.size();i++) { + if (Values[i] - Values[i-1] > BestDistance) { + BestIndex = i; + BestDistance = Values[i] - Values[i-1]; + } + } - // Now we have signed numbers that have been shifted so that, given enough - // precision, there are no negative values. Since the rest of the transform - // is bitwise only, we switch now to an unsigned representation. - uint64_t GCD = 0; - for (auto &V : Values) - GCD = GreatestCommonDivisor64(GCD, (uint64_t)V); + uint64_t Base = Values[BestIndex]; + // Now transform the values such that they start at zero and ascend. + if ((BestDistance > (SI->getFunction()->hasOptSize() ? 2 : 8)) && + (BestIndex != 0 || Values[0] != 0)) { + MadeChanges = true; + for (auto &V : Values) + V -= Base; + } // This transform can be done speculatively because it is so cheap - it results - // in a single rotate operation being inserted. This can only happen if the - // factor extracted is a power of 2. - // FIXME: If the GCD is an odd number we can multiply by the multiplicative - // inverse of GCD and then perform this transform. + // in a single rotate operation being inserted. // FIXME: It's possible that optimizing a switch on powers of two might also // be beneficial - flag values are often powers of two and we could use a CLZ // as the key function. - if (GCD <= 1 || !isPowerOf2_64(GCD)) - // No common divisor found or too expensive to compute key function. - return false; - - unsigned Shift = Log2_64(GCD); - for (auto &V : Values) - V = (int64_t)((uint64_t)V >> Shift); + unsigned Shift = 64; + for (auto &V : Values) { + unsigned TZ = APInt(64, V ^ Values[0]).countTrailingZeros(); + if (TZ < Shift) { + Shift = TZ; + if (Shift == 0) + break; + } + } + if (Shift) { + MadeChanges = true; + for (auto &V : Values) + V = V >> Shift; + } - if (!isSwitchDense(Values)) - // Transform didn't create a dense switch. + if (!MadeChanges) + // Didn't do anything return false; // The obvious transform is to shift the switch condition right and emit a @@ -5577,19 +5571,19 @@ // default case. auto *Ty = cast(SI->getCondition()->getType()); + Function *Fshr = Intrinsic::getDeclaration(SI->getModule(), Intrinsic::fshr, Ty); Builder.SetInsertPoint(SI); auto *ShiftC = ConstantInt::get(Ty, Shift); - auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base)); - auto *LShr = Builder.CreateLShr(Sub, ShiftC); - auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift); - auto *Rot = Builder.CreateOr(LShr, Shl); - SI->replaceUsesOfWith(SI->getCondition(), Rot); + uint64_t CommonRightBits = (Shift == 0) ? 0 : (Values[0] << (64 - Shift)) >> (64 - Shift); + auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base + CommonRightBits)); + auto *Rotr = Builder.Insert(CallInst::Create(Fshr, {Sub, Sub, ShiftC})); + SI->replaceUsesOfWith(SI->getCondition(), Rotr); for (auto Case : SI->cases()) { auto *Orig = Case.getCaseValue(); auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base); Case.setValue( - cast(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue())))); + cast(ConstantInt::get(Ty, Sub.lshr(Shift)))); } return true; } @@ -5630,6 +5624,9 @@ if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) return requestResimplify(); + if (ReduceSwitchRange(SI, Builder, DL, TTI)) + return requestResimplify(); + // The conversion from switch to lookup tables results in difficult-to-analyze // code and makes pruning branches much harder. This is a problem if the // switch expression itself can still be restricted as a result of inlining or @@ -5639,9 +5636,6 @@ SwitchToLookupTable(SI, Builder, DL, TTI)) return requestResimplify(); - if (ReduceSwitchRange(SI, Builder, DL, TTI)) - return requestResimplify(); - return false; } Index: test/Transforms/SimplifyCFG/switch-simplify-range.ll =================================================================== --- /dev/null +++ test/Transforms/SimplifyCFG/switch-simplify-range.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -simplifycfg < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define i64 @switch_common_right_bits(i8 %a) #0 { +; CHECK-LABEL: @switch_common_right_bits( +; CHECK-NEXT: Entry: +; CHECK-NEXT: [[TMP0:%.*]] = sub i8 [[A:%.*]], -3 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP0]], i8 [[TMP0]], i8 1) +; CHECK-NEXT: switch i8 [[TMP1]], label [[SWITCHELSE:%.*]] [ +; CHECK-NEXT: i8 0, label [[SWITCHPRONG:%.*]] +; CHECK-NEXT: i8 1, label [[SWITCHPRONG1:%.*]] +; CHECK-NEXT: i8 2, label [[SWITCHPRONG2:%.*]] +; CHECK-NEXT: i8 3, label [[SWITCHPRONG3:%.*]] +; CHECK-NEXT: ] +; CHECK: SwitchElse: +; CHECK-NEXT: [[MERGE:%.*]] = phi i64 [ 10, [[ENTRY:%.*]] ], [ 6, [[SWITCHPRONG]] ], [ 3, [[SWITCHPRONG1]] ], [ 3, [[SWITCHPRONG2]] ], [ 3, [[SWITCHPRONG3]] ] +; CHECK-NEXT: ret i64 [[MERGE]] +; CHECK: SwitchProng: +; CHECK-NEXT: br label [[SWITCHELSE]] +; CHECK: SwitchProng1: +; CHECK-NEXT: br label [[SWITCHELSE]] +; CHECK: SwitchProng2: +; CHECK-NEXT: br label [[SWITCHELSE]] +; CHECK: SwitchProng3: +; CHECK-NEXT: br label [[SWITCHELSE]] +; +Entry: + switch i8 %a, label %SwitchElse [ + i8 253, label %SwitchProng + i8 255, label %SwitchProng1 + i8 1, label %SwitchProng2 + i8 3, label %SwitchProng3 + ] +SwitchElse: ; preds = %Entry + ret i64 10 +SwitchProng: ; preds = %Entry + ret i64 6 +SwitchProng1: ; preds = %Entry + ret i64 3 +SwitchProng2: ; preds = %Entry + ret i64 3 +SwitchProng3: ; preds = %Entry + ret i64 3 +} +