Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -317,7 +317,7 @@ unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize) { /// Try to find the estimated number of clusters. Note that the number of - /// clusters identified in this function could be different from the actural + /// clusters identified in this function could be different from the actual /// numbers found in lowering. This function ignore switches that are /// lowered with a mix of jump table / bit test / BTree. This function was /// initially intended to be used when estimating the cost of switch in @@ -363,7 +363,7 @@ (MaxCaseVal - MinCaseVal) .getLimitedValue(std::numeric_limits::max() - 1) + 1; // Check whether a range of clusters is dense enough for a jump table - if (TLI->isSuitableForJumpTable(&SI, N, Range)) { + if (TLI->isSuitableForJumpTable(&SI, N, 0, Range)) { JumpTableSize = Range; return 1; } Index: llvm/include/llvm/CodeGen/SwitchLoweringUtils.h =================================================================== --- llvm/include/llvm/CodeGen/SwitchLoweringUtils.h +++ llvm/include/llvm/CodeGen/SwitchLoweringUtils.h @@ -221,7 +221,7 @@ Cases(std::move(C)), Prob(Pr) {} }; -/// Return the range of value within a range. +/// Return the range of values within a range. uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First, unsigned Last); @@ -229,6 +229,12 @@ uint64_t getJumpTableNumCases(const SmallVectorImpl &TotalCases, unsigned First, unsigned Last); +/// Return the number of unique case targets within a range. +uint64_t getJumpTableNumTargets(const CaseClusterVector &Clusters, + unsigned First, unsigned Last, + bool HasReachableDefault, + uint64_t Cases, uint64_t Range); + struct SwitchWorkListItem { MachineBasicBlock *MBB; CaseClusterIt FirstCluster; Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -1025,8 +1025,10 @@ } /// Return true if lowering to a jump table is suitable for a set of case - /// clusters which may contain \p NumCases cases, \p Range range of values. - virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, + /// clusters which may contain \p NumCases cases, \p Range range of values, + /// \p NumTargets targets. + virtual bool isSuitableForJumpTable(const SwitchInst *SI, + uint64_t NumCases, uint64_t NumTargets, uint64_t Range) const { // FIXME: This function check the maximum table size and density, but the // minimum size is not checked. It would be nice if the minimum size is @@ -1035,14 +1037,14 @@ // getEstimatedNumberOfCaseClusters() in BasicTTIImpl. const bool OptForSize = SI->getParent()->getParent()->hasOptSize(); const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); - const unsigned MaxJumpTableSize = getMaximumJumpTableSize(); - - // Check whether the number of cases is small enough and + const unsigned MaxJumpTableTargets = getMaximumJumpTableTargets(); + + // Check whether the number of targets is small enough and // the range is dense enough for a jump table. - if ((OptForSize || Range <= MaxJumpTableSize) && - (NumCases * 100 >= Range * MinDensity)) { + if ((OptForSize || NumTargets <= MaxJumpTableTargets) && + NumCases * 100 >= Range * MinDensity) return true; - } + return false; } @@ -1545,9 +1547,8 @@ /// Return lower limit of the density in a jump table. unsigned getMinimumJumpTableDensity(bool OptForSize) const; - /// Return upper limit for number of entries in a jump table. - /// Zero if no limit. - unsigned getMaximumJumpTableSize() const; + /// Return upper limit for number of targets in a jump table. + unsigned getMaximumJumpTableTargets() const; virtual bool isJumpTableRelative() const { return TM.isPositionIndependent(); @@ -1965,9 +1966,8 @@ /// Indicate the minimum number of blocks to generate jump tables. void setMinimumJumpTableEntries(unsigned Val); - /// Indicate the maximum number of entries in jump tables. - /// Set to zero to generate unlimited jump tables. - void setMaximumJumpTableSize(unsigned); + /// Indicate the maximum number of targets in jump tables. + void setMaximumJumpTableTargets(unsigned); /// If set to a physical register, this specifies the register that /// llvm.savestack/llvm.restorestack should save and restore. Index: llvm/lib/CodeGen/SwitchLoweringUtils.cpp =================================================================== --- llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" @@ -40,6 +41,20 @@ return NumCases; } +uint64_t +SwitchCG::getJumpTableNumTargets(const CaseClusterVector &Clusters, + unsigned First, unsigned Last, + bool HasReachableDefault, + uint64_t Cases, uint64_t Range) { + assert(Last >= First); + SmallSet Targets; + + for (unsigned i = First; i <= Last; i++) + Targets.insert(Clusters[i].MBB); + + return Targets.size() + (HasReachableDefault && Range > Cases); +} + void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI, MachineBasicBlock *DefaultMBB) { @@ -74,13 +89,18 @@ TotalCases[i] += TotalCases[i - 1]; } - uint64_t Range = getJumpTableRange(Clusters,0, N - 1); + const bool HasReachableDefault = + !isa(DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg()); + uint64_t Range = getJumpTableRange(Clusters, 0, N - 1); uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1); + uint64_t NumTargets = getJumpTableNumTargets(Clusters, 0, N - 1, + HasReachableDefault, + NumCases, Range); assert(NumCases < UINT64_MAX / 100); assert(Range >= NumCases); // Cheap case: the whole range may be suitable for jump table. - if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) { + if (TLI->isSuitableForJumpTable(SI, NumCases, NumTargets, Range)) { CaseCluster JTCluster; if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { Clusters[0] = JTCluster; @@ -135,10 +155,12 @@ // Try building a partition from Clusters[i..j]. Range = getJumpTableRange(Clusters, i, j); NumCases = getJumpTableNumCases(TotalCases, i, j); + NumTargets = getJumpTableNumTargets(Clusters, i, j, HasReachableDefault, + NumCases, Range); assert(NumCases < UINT64_MAX / 100); assert(Range >= NumCases); - if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) { + if (TLI->isSuitableForJumpTable(SI, NumCases, NumTargets, Range)) { unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1]; int64_t NumEntries = j - i + 1; Index: llvm/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -72,9 +72,9 @@ ("min-jump-table-entries", cl::init(4), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table.")); -static cl::opt MaximumJumpTableSize - ("max-jump-table-size", cl::init(UINT_MAX), cl::Hidden, - cl::desc("Set maximum size of jump tables.")); +static cl::opt MaximumJumpTableTargets + ("max-jump-table-targets", cl::init(UINT_MAX), cl::Hidden, + cl::desc("Set maximum number of targets to use in a jump table.")); /// Minimum jump table density for normal functions. static cl::opt @@ -1761,16 +1761,16 @@ MinimumJumpTableEntries = Val; } -unsigned TargetLoweringBase::getMinimumJumpTableDensity(bool OptForSize) const { - return OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; +unsigned TargetLoweringBase::getMaximumJumpTableTargets() const { + return MaximumJumpTableTargets; } -unsigned TargetLoweringBase::getMaximumJumpTableSize() const { - return MaximumJumpTableSize; +void TargetLoweringBase::setMaximumJumpTableTargets(unsigned Val) { + MaximumJumpTableTargets = Val; } -void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) { - MaximumJumpTableSize = Val; +unsigned TargetLoweringBase::getMinimumJumpTableDensity(bool OptForSize) const { + return OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; } //===----------------------------------------------------------------------===// Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -641,11 +641,10 @@ setPrefFunctionAlignment(STI.getPrefFunctionAlignment()); setPrefLoopAlignment(STI.getPrefLoopAlignment()); - // Only change the limit for entries in a jump table if specified by + // Only change the limit for targets in a jump table if specified by // the sub target, but not at the command line. - unsigned MaxJT = STI.getMaximumJumpTableSize(); - if (MaxJT && getMaximumJumpTableSize() == UINT_MAX) - setMaximumJumpTableSize(MaxJT); + if (getMaximumJumpTableTargets() == UINT_MAX) + setMaximumJumpTableTargets(STI.getMaximumJumpTableTargets()); setHasExtractBitsInsn(true); Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -200,7 +200,7 @@ unsigned MaxPrefetchIterationsAhead = UINT_MAX; unsigned PrefFunctionAlignment = 0; unsigned PrefLoopAlignment = 0; - unsigned MaxJumpTableSize = 0; + unsigned MaxJumpTableTargets = UINT_MAX; unsigned WideningBaseCost = 0; // ReserveXRegister[i] - X#i is not available as a general purpose register. @@ -362,7 +362,7 @@ unsigned getPrefFunctionAlignment() const { return PrefFunctionAlignment; } unsigned getPrefLoopAlignment() const { return PrefLoopAlignment; } - unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; } + unsigned getMaximumJumpTableTargets() const { return MaxJumpTableTargets; } unsigned getWideningBaseCost() const { return WideningBaseCost; } Index: llvm/lib/Target/AArch64/AArch64Subtarget.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -95,13 +95,13 @@ break; case ExynosM1: MaxInterleaveFactor = 4; - MaxJumpTableSize = 8; + MaxJumpTableTargets = 8; PrefFunctionAlignment = 4; PrefLoopAlignment = 3; break; case ExynosM3: MaxInterleaveFactor = 4; - MaxJumpTableSize = 20; + MaxJumpTableTargets = 20; PrefFunctionAlignment = 5; PrefLoopAlignment = 4; break; Index: llvm/test/CodeGen/AArch64/max-jump-table.ll =================================================================== --- llvm/test/CodeGen/AArch64/max-jump-table.ll +++ llvm/test/CodeGen/AArch64/max-jump-table.ll @@ -1,9 +1,9 @@ -; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK0 < %t -; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-size=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4 < %t -; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-size=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8 < %t -; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-size=16 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK16 < %t -; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m1 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM1 < %t -; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m3 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM3 < %t +; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK0 < %t +; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-targets=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4 < %t +; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-targets=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8 < %t +; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-targets=16 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK16 < %t +; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m1 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM1 < %t +; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m3 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM3 < %t declare void @ext(i32, i32) @@ -86,11 +86,11 @@ ; CHECK0-NOT: %jump-table.1: ; CHECK4-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4{{$}} ; CHECK4-NOT: %jump-table.1: -; CHECK8-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4{{$}} +; CHECK8-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.5 %bb.6{{$}} ; CHECK8-NOT: %jump-table.1: ; CHECK16-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.5 %bb.6{{$}} ; CHECK16-NOT: %jump-table.1: -; CHECKM1-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4{{$}} +; CHECKM1-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.5 %bb.6{{$}} ; CHECKM1-NOT: %jump-table.1: ; CHECKM3-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.5 %bb.6{{$}} ; CHECKM3-NOT: %jump-table.1: @@ -102,6 +102,7 @@ bb4: tail call void @ext(i32 3, i32 4) br label %return bb5: tail call void @ext(i32 2, i32 5) br label %return bb6: tail call void @ext(i32 1, i32 6) br label %return + return: ret void } @@ -131,14 +132,13 @@ ; CHECK4-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 ; CHECK4-NEXT: %jump-table.1: %bb.5 %bb.6 %bb.7 %bb.8 ; CHECK4-NOT: %jump-table.2: -; CHECK8-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 -; CHECK8-NEXT: %jump-table.1: %bb.5 %bb.6 %bb.7 %bb.8 %bb.13 %bb.9 %bb.10 +; CHECK8-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.5 %bb.6 %bb.7 +; CHECK8-NEXT: %jump-table.1: %bb.8 %bb.13 %bb.9 %bb.10 %bb.13 %bb.11 %bb.12 ; CHECK8-NOT: %jump-table.2: -; CHECK16-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.5 %bb.6 %bb.7 -; CHECK16-NEXT: %jump-table.1: %bb.8 %bb.13 %bb.9 %bb.10 %bb.13 %bb.11 %bb.12 -; CHECK16-NOT: %jump-table.2: -; CHECKM1-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 -; CHECKM1-NEXT: %jump-table.1: %bb.5 %bb.6 %bb.7 %bb.8 %bb.13 %bb.9 %bb.10 +; CHECK16-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.5 %bb.6 %bb.7 %bb.8 %bb.13 %bb.9 %bb.10 %bb.13 %bb.11 %bb.12 +; CHECK16-NOT: %jump-table.1: +; CHECKM1-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.5 %bb.6 %bb.7 +; CHECKM1-NEXT: %jump-table.1: %bb.8 %bb.13 %bb.9 %bb.10 %bb.13 %bb.11 %bb.12 ; CHECKM1-NOT: %jump-table.2: ; CHECKM3-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.5 %bb.6 %bb.7 %bb.8 %bb.13 %bb.9 %bb.10 ; CHECKM3-NOT: %jump-table.1: @@ -185,15 +185,15 @@ ; CHECK0-NOT: %jump-table.1: ; CHECK4-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 ; CHECK4-NEXT: %jump-table.1: %bb.5 %bb.6 %bb.7 %bb.8 -; CHECK4-NOT: %jump-table.2: -; CHECK8-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 -; CHECK8-NEXT: %jump-table.1: %bb.5 %bb.6 %bb.7 %bb.8 %bb.13 %bb.9 %bb.10 +; CHECK4-NEXT: %jump-table.2: %bb.9 %bb.10 %bb.13 %bb.11 %bb.12 +; CHECK4-NOT: %jump-table.3: +; CHECK8-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.5 %bb.6 %bb.7 %bb.8 +; CHECK8-NEXT: %jump-table.1: %bb.9 %bb.10 %bb.13 %bb.11 %bb.12 ; CHECK8-NOT: %jump-table.2: -; CHECK16-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.5 %bb.6 %bb.7 -; CHECK16-NEXT: %jump-table.1: %bb.8 %bb.13 %bb.9 %bb.10 %bb.13 %bb.11 %bb.12 -; CHECK16-NOT: %jump-table.2: -; CHECKM1-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 -; CHECKM1-NEXT: %jump-table.1: %bb.5 %bb.6 %bb.7 %bb.8 %bb.13 %bb.9 %bb.10 +; CHECK16-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.5 %bb.6 %bb.7 %bb.8 %bb.13 %bb.9 %bb.10 %bb.13 %bb.11 %bb.12 +; CHECK16-NOT: %jump-table.1: +; CHECKM1-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.5 %bb.6 %bb.7 %bb.8 +; CHECKM1-NEXT: %jump-table.1: %bb.9 %bb.10 %bb.13 %bb.11 %bb.12 ; CHECKM1-NOT: %jump-table.2: ; CHECKM3-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.13 %bb.5 %bb.6 %bb.7 %bb.8 %bb.13 %bb.9 %bb.10 ; CHECKM3-NOT: %jump-table.1: