Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -135,6 +135,8 @@ NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)"); STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares"); +STATISTIC(NumSwitchWidened, + "Number of switch instructions widened to cover all possible values"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); @@ -4842,6 +4844,103 @@ SI->eraseFromParent(); } +// Analyze the switch expression and compute the highest possible value of the +// expression and if known then remap the known default values into individual +// case statements and direct them to the default block. Then mark the default +// destination as unreachable. +static bool widenSwitchRangeToCoverAllValues(SwitchInst *SI, + const DataLayout &DL, + const TargetTransformInfo &TTI) { + unsigned ValueWidth = DL.getTypeSizeInBits(SI->getOperand(0)->getType()); + unsigned OrigValueWidth = ValueWidth; + KnownBits Known(ValueWidth); + llvm::computeKnownBits(SI->getOperand(0), Known, DL); + + bool UnreachableDefault = + isa(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + + // We only care for non-negative values. The number of minimum leading + // zeros plus the number of maximum trailing ones should be equal to the + // width of the value. If the default is unreachable then we skip this + // optimization. + if (Known.isNegative() || UnreachableDefault || + (Known.countMinLeadingZeros() + Known.countMaxTrailingOnes()) != + ValueWidth) + return false; + + // ValueWidth should be the minimum width needed to represent + // MaxSwitchValue * 100. Since 100 needs 7 bits we add 7 to ValueWidth. + ValueWidth += 7; + + APInt NumCases(ValueWidth, SI->getNumCases()); + APInt MaxSwitchValue = (~Known.Zero).zext(ValueWidth); + APInt Hundred = APInt(ValueWidth, 100); + APInt Seventy = APInt(ValueWidth, 70); + + // At least 70% of the possible values must be defined as case statements + // so that there is no penalty for those which have few defined case + // statements. + if (MaxSwitchValue.ugt(NumCases) && + SI->getNumCases() > 0 && + (NumCases * Hundred).udiv(MaxSwitchValue).uge(Seventy)) { + // Here we not only need to care for those values that are beyond the + // highest case value and up to the MaxSwitchValue but also for the other + // missing values (holes) that appear between the already defined cases. + BasicBlock *DefaultBlock = SI->getDefaultDest(); + assert(SI->case_begin() != SI->case_end()); + SmallVector Cases; + + for (auto Case : SI->cases()) + Cases.push_back(Case.getCaseValue()); + + // The function CasesAreContiguous sorts the vector Cases in descending + // order. + if (CasesAreContiguous(Cases)) { + for (APInt I = (Cases[0]->getValue().zext(ValueWidth) + 1); + I.ule(MaxSwitchValue); I++) { + SI->addCase(ConstantInt::get(DefaultBlock->getContext(), + I.trunc(OrigValueWidth)), DefaultBlock); + if (MaxSwitchValue.ne(I)) { + AddPredecessorToBlock(DefaultBlock, SI->getParent(), SI->getParent()); + } + } + } else { + unsigned int i = 0; + bool SeenFirstDefaultCase = false; + for (APInt I = MaxSwitchValue, E(ValueWidth, -1); I.ne(E); I--) { + if (i >= Cases.size() || + Cases[i]->getValue().zext(ValueWidth) != I) { + SI->addCase(ConstantInt::get(DefaultBlock->getContext(), + I.trunc(OrigValueWidth)), DefaultBlock); + if (SeenFirstDefaultCase) + AddPredecessorToBlock(DefaultBlock, SI->getParent(), SI->getParent()); + else + SeenFirstDefaultCase = true; + } else { + i++; + } + } + } + + // We have already added cases for all the possible values including those + // that would have mapped to the default case and redirected the newly + // added cases to the original default block. Hence, the new default block + // can be marked as unreachable. + BasicBlock *NewDefaultBlock; + NewDefaultBlock = BasicBlock::Create(DefaultBlock->getContext(), + "switch.unreachable.default", + DefaultBlock->getParent()); + UnreachableInst *dummyInst LLVM_ATTRIBUTE_UNUSED; + dummyInst = new UnreachableInst(NewDefaultBlock->getContext(), + NewDefaultBlock); + SI->setDefaultDest(NewDefaultBlock); + ++NumSwitchWidened; + return true; + } + + return false; +} + /// If the switch is only used to initialize one or more /// phi nodes in a common successor block with only two different /// constant values, replace the switch with select. @@ -5614,6 +5713,11 @@ return requestResimplify(); } + // Try to convert small number of default values to individual case + // statements pointing to the default block. + if (widenSwitchRangeToCoverAllValues(SI, DL, TTI)) + return requestResimplify(); + // Try to transform the switch into an icmp and a branch. if (TurnSwitchRangeIntoICmp(SI, Builder)) return requestResimplify(); Index: test/Transforms/SimplifyCFG/switch-widen.ll =================================================================== --- /dev/null +++ test/Transforms/SimplifyCFG/switch-widen.ll @@ -0,0 +1,92 @@ +; RUN: opt %s -S -simplifycfg | FileCheck %s +@x = external global i32, align 4 +@y = external global i32, align 4 +@z = external global i32, align 4 + +define i32 @foo(i4) { + switch i4 %0, label %default.block [ + i4 0, label %case_0 + i4 -4, label %case_12 + i4 2, label %case_2 + i4 3, label %case_3 + i4 -2, label %case_14 + i4 -1, label %case_15 + i4 6, label %case_6 + i4 7, label %case_7 + i4 -8, label %case_8 + i4 -7, label %case_9 + i4 -6, label %case_10 + i4 -5, label %case_11 + ] + +case_0: ; preds = %1 + store i32 425, i32* @x + br label %return.block + +case_12: ; preds = %1 + store i32 457, i32* @y + br label %return.block + +case_2: ; preds = %1 + store i32 65456, i32* @z + br label %return.block + +case_3: ; preds = %1 + store i32 57, i32* @y + store i32 25, i32* @x + br label %return.block + +case_14: ; preds = %1 + store i32 5456, i32* @z + store i32 45, i32* @x + br label %return.block + +case_15: ; preds = %1 + store i32 42, i32* @x + br label %return.block + +case_6: ; preds = %1 + store i32 47, i32* @y + br label %return.block + +case_7: ; preds = %1 + store i32 6556, i32* @z + br label %return.block + +case_8: ; preds = %1 + store i32 1425, i32* @x + store i32 6546, i32* @z + br label %return.block + +case_9: ; preds = %1 + store i32 1457, i32* @y + store i32 6545, i32* @z + br label %return.block + +case_10: ; preds = %1 + store i32 6456, i32* @z + store i32 2457, i32* @y + br label %return.block + +case_11: ; preds = %1 + store i32 5425, i32* @x + br label %return.block + +default.block: ; preds = %1 + store i32 3409, i32* @z + store i32 3409, i32* @x + br label %return.block + +return.block: ; preds = %default.block, %case_11, %case_10, %case_9, %case_8, %case_7, %case_6, %case_15, %case_14, %case_3, %case_2, %case_12, %case_0 + %2 = phi i32 [ 567, %default.block ], [ 54325, %case_11 ], [ 44325, %case_10 ], [ 34325, %case_9 ], [ 24325, %case_8 ], [ 14325, %case_7 ], [ 4, %case_6 ], [ 43, %case_15 ], [ 432, %case_14 ], [ 5, %case_3 ], [ 25, %case_2 ], [ 325, %case_12 ], [ 4325, %case_0 ] + ret i32 %2 +} +; CHECK-LABEL: @foo( +; CHECK-NEXT: switch i4 %0, label %switch.unreachable.default +; CHECK: i4 -3, label %default.block +; CHECK: i4 5, label %default.block +; CHECK: i4 4, label %default.block +; CHECK: i4 1, label %default.block +; CHECK: default.block: ; preds = %1, %1, %1, %1 +; CHECK: switch.unreachable.default: ; preds = %1 +; CHECK-NEXT: unreachable