Index: llvm/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/include/llvm/Target/TargetLowering.h +++ llvm/include/llvm/Target/TargetLowering.h @@ -1008,12 +1008,16 @@ return UseUnderscoreLongJmp; } - /// Return integer threshold on number of blocks to use jump tables rather + /// Return lower limit for number of blocks to use jump tables rather /// than if sequence. - int getMinimumJumpTableEntries() const { + unsigned getMinimumJumpTableEntries() const { return MinimumJumpTableEntries; } + /// Return upper limit for number of blocks to use jump tables rather + /// than if sequence. + unsigned getMaximumJumpTableEntries() const; + /// If a physical register, this specifies the register that /// llvm.savestack/llvm.restorestack should save and restore. unsigned getStackPointerRegisterToSaveRestore() const { @@ -1341,10 +1345,14 @@ /// Indicate the number of blocks to generate jump tables rather than if /// sequence. - void setMinimumJumpTableEntries(int Val) { + void setMinimumJumpTableEntries(unsigned Val) { MinimumJumpTableEntries = Val; } + /// Indicate the maximum number of blocks in jump tables. + /// Set to zero to generate unlimited jump tables. + void setMaximumJumpTableEntries(unsigned); + /// If set to a physical register, this specifies the register that /// llvm.savestack/llvm.restorestack should save and restore. void setStackPointerRegisterToSaveRestore(unsigned R) { @@ -1936,8 +1944,8 @@ /// Defaults to false. bool UseUnderscoreLongJmp; - /// Number of blocks threshold to use jump tables. - int MinimumJumpTableEntries; + /// Lower limit for the number of blocks to use jump tables. + unsigned MinimumJumpTableEntries; /// Information about the contents of the high-bits in boolean values held in /// a type wider than i1. See getBooleanContents. Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8222,7 +8222,7 @@ return NumCases * 100 >= Range * Density; } -static inline bool areJTsAllowed(const TargetLowering &TLI, +static inline bool isJTAllowed(const TargetLowering &TLI, const SwitchInst *SI) { const Function *Fn = SI->getParent()->getParent(); if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") @@ -8321,28 +8321,30 @@ #endif const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!areJTsAllowed(TLI, SI)) - return; - const int64_t N = Clusters.size(); - const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries(); + bool OptForSize = DefaultMBB->getParent()->getFunction()->optForSize(); + + const auto MinJumpTableSize = TLI.getMinimumJumpTableEntries(); + const auto MaxJumpTableSize = OptForSize ? + UINT_MAX : TLI.getMaximumJumpTableEntries(); + + const auto N = Clusters.size(); + if (N < MinJumpTableSize || !isJTAllowed(TLI, SI)) + return; // TotalCases[i]: Total nbr of cases in Clusters[0..i]. SmallVector TotalCases(N); - for (unsigned i = 0; i < N; ++i) { APInt Hi = Clusters[i].High->getValue(); APInt Lo = Clusters[i].Low->getValue(); TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; - if (i != 0) + if (i > 0) TotalCases[i] += TotalCases[i - 1]; } - unsigned MinDensity = JumpTableDensity; - if (DefaultMBB->getParent()->getFunction()->optForSize()) - MinDensity = OptsizeJumpTableDensity; - if (N >= MinJumpTableSize - && isDense(Clusters, &TotalCases[0], 0, N - 1, MinDensity)) { + unsigned MinDensity = OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; + if (MinJumpTableSize <= N && N <= MaxJumpTableSize && + isDense(Clusters, &TotalCases[0], 0, N - 1, MinDensity)) { // Cheap case: the whole range might be suitable for jump table. CaseCluster JTCluster; if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { @@ -8363,65 +8365,74 @@ // order. In the choice between two optimal partitionings, it picks the one // which yields more jump tables. - // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. + // MinPartitions[i] is the minimum number of partitions of Clusters[i..N - 1]. SmallVector MinPartitions(N); // LastElement[i] is the last element of the partition starting at i. SmallVector LastElement(N); - // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1]. + // NumTables[i] is the number of partitions of Clusters[i..N - 1] whose size + // is greater than the minimum size. SmallVector NumTables(N); - // Base case: There is only one way to partition Clusters[N-1]. + // Base case: There is only one way to partition Clusters[N - 1]. MinPartitions[N - 1] = 1; LastElement[N - 1] = N - 1; - assert(MinJumpTableSize > 1); NumTables[N - 1] = 0; - // Note: loop indexes are signed to avoid underflow. - for (int64_t i = N - 2; i >= 0; i--) { - // Find optimal partitioning of Clusters[i..N-1]. + assert(1 < MinJumpTableSize && MinJumpTableSize <= MaxJumpTableSize); + for (auto i = N - 1; i > 0; i--) { + auto k = i - 1; + + // Find optimal partitioning of Clusters[i..N - 1]. // Baseline: Put Clusters[i] into a partition on its own. - MinPartitions[i] = MinPartitions[i + 1] + 1; - LastElement[i] = i; - NumTables[i] = NumTables[i + 1]; + MinPartitions[k] = MinPartitions[i] + 1; + LastElement[k] = k; + NumTables[k] = NumTables[i]; // Search for a solution that results in fewer partitions. - for (int64_t j = N - 1; j > i; j--) { + for (auto j = N - 1; j > k; j--) { + auto l = j + 1; + // Try building a partition from Clusters[i..j]. - if (isDense(Clusters, &TotalCases[0], i, j, MinDensity)) { - unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); - bool IsTable = j - i + 1 >= MinJumpTableSize; - unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]); + if (isDense(Clusters, &TotalCases[0], k, j, MinDensity)) { + unsigned CurPartitions = (l < N) ? MinPartitions[l] : 0; + unsigned NewPartitions = CurPartitions + 1; + + bool MinTable = (l - k) >= MinJumpTableSize; + unsigned MaxTables = (l - i) / MaxJumpTableSize; + unsigned CurTables = (l < N) ? NumTables[l] : 0; + unsigned NewTables = CurTables + MinTable + MaxTables; // If this j leads to fewer partitions, or same number of partitions // with more lookup tables, it is a better partitioning. - if (NumPartitions < MinPartitions[i] || - (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) { - MinPartitions[i] = NumPartitions; - LastElement[i] = j; - NumTables[i] = Tables; + if (NewPartitions < MinPartitions[k] || + (NewPartitions == MinPartitions[k] && NewTables > NumTables[k])) { + MinPartitions[k] = NewPartitions; + LastElement[k] = j - MaxTables * MaxJumpTableSize; + NumTables[k] = NewTables; } } } } - // Iterate over the partitions, replacing some with jump tables in-place. - unsigned DstIndex = 0; + // Iterate over the partitions, replacing some with jump tables. + unsigned Count = 0; for (unsigned First = 0, Last; First < N; First = Last + 1) { Last = LastElement[First]; assert(Last >= First); - assert(DstIndex <= First); + assert(Count <= First); unsigned NumClusters = Last - First + 1; + assert(NumClusters <= MaxJumpTableSize); CaseCluster JTCluster; - if (NumClusters >= MinJumpTableSize && + if (MinJumpTableSize <= NumClusters && buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) { - Clusters[DstIndex++] = JTCluster; + Clusters[Count++] = JTCluster; } else { for (unsigned I = First; I <= Last; ++I) - std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); + std::memmove(&Clusters[Count++], &Clusters[I], sizeof(Clusters[I])); } } - Clusters.resize(DstIndex); + Clusters.resize(Count); } bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) { Index: llvm/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -44,6 +44,10 @@ cl::desc("Do not create extra branches to split comparison logic."), cl::Hidden); +static cl::opt MaximumJumpTableEntries + ("max-jump-table", cl::init(UINT_MAX), cl::Hidden, + cl::desc("Set maximum number of jump table entries")); + // Although this default value is arbitrary, it is not random. It is assumed // that a condition that evaluates the same way by a higher percentage than this // is best represented as control flow. Therefore, the default value N should be @@ -1839,3 +1843,11 @@ Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { return nullptr; } + +unsigned TargetLoweringBase::getMaximumJumpTableEntries() const { + return MaximumJumpTableEntries; +} + +void TargetLoweringBase::setMaximumJumpTableEntries(unsigned Val) { + MaximumJumpTableEntries = Val ? Val : UINT_MAX; +} Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -523,6 +523,12 @@ setPrefFunctionAlignment(STI.getPrefFunctionAlignment()); setPrefLoopAlignment(STI.getPrefLoopAlignment()); + // Only change the limit for entries in jump tables if specified by + // the subtarget, but not at the command line. + auto MaxJT = STI.getMaximumJumpTableEntries(); + if (MaxJT && getMaximumJumpTableEntries() == UINT_MAX) + setMaximumJumpTableEntries(MaxJT); + setHasExtractBitsInsn(true); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -91,6 +91,7 @@ unsigned MaxPrefetchIterationsAhead = UINT_MAX; unsigned PrefFunctionAlignment = 0; unsigned PrefLoopAlignment = 0; + unsigned MaxJumpTableEntries = 0; // ReserveX18 - X18 is not available as a general purpose register. bool ReserveX18; @@ -203,6 +204,8 @@ unsigned getPrefFunctionAlignment() const { return PrefFunctionAlignment; } unsigned getPrefLoopAlignment() const { return PrefLoopAlignment; } + unsigned getMaximumJumpTableEntries() const { return MaxJumpTableEntries; } + /// CPU has TBI (top byte of addresses is ignored during HW address /// translation) and OS enables it. bool supportsAddressTopByteIgnored() const; Index: llvm/lib/Target/AArch64/AArch64Subtarget.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -26,14 +26,14 @@ #define GET_SUBTARGETINFO_TARGET_DESC #include "AArch64GenSubtargetInfo.inc" -static cl::opt -EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " - "converter pass"), cl::init(true), cl::Hidden); +static cl::opt EnableEarlyIfConvert + ("aarch64-early-ifcvt", cl::init(true), cl::Hidden, + cl::desc("Enable the early if converter pass")); // If OS supports TBI, use this flag to enable it. -static cl::opt -UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " - "an address is ignored"), cl::init(false), cl::Hidden); +static cl::opt UseAddressTopByteIgnored + ("aarch64-use-tbi", cl::init(false), cl::Hidden, + cl::desc("Assume that top byte of an address is ignored")); AArch64Subtarget & AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) { @@ -65,6 +65,7 @@ case ExynosM1: PrefFunctionAlignment = 4; PrefLoopAlignment = 3; + MaxJumpTableEntries = 16; break; case Kryo: MaxInterleaveFactor = 4; Index: llvm/test/CodeGen/Generic/max-jump-table.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Generic/max-jump-table.ll @@ -0,0 +1,149 @@ +; RUN: llc %s -O2 -print-machineinstrs -o - 2>%t; FileCheck %s <%t +; RUN: llc %s -O2 -print-machineinstrs -max-jump-table=8 -o - 2>%t; FileCheck %s --check-prefix=CHECK8 <%t +; RUN: llc %s -O2 -print-machineinstrs -max-jump-table=4 -o - 2>%t; FileCheck %s --check-prefix=CHECK4 <%t + +; Function Attrs: nounwind +define i32 @jt(i32 %a, i32 %b) local_unnamed_addr #0 { +entry: + switch i32 %a, label %return [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + i32 3, label %sw.bb3 + i32 4, label %sw.bb5 + i32 5, label %sw.bb7 + i32 6, label %sw.bb9 + i32 7, label %sw.bb11 + i32 8, label %sw.bb13 + i32 9, label %sw.bb15 + i32 10, label %sw.bb17 + i32 11, label %sw.bb19 + i32 12, label %sw.bb21 + i32 13, label %sw.bb23 + i32 14, label %sw.bb25 + i32 15, label %sw.bb27 + i32 16, label %sw.bb29 + i32 17, label %sw.bb31 + ] +; CHECK: Jump Tables: +; CHECK-NEXT: jt#0: +; CHECK-NOT: jt#1: +; CHECK8: Jump Tables: +; CHECK8-NEXT: jt#0: +; CHECK8-SAME: jt#1: +; CHECK8-NOT: jt#2: +; CHECK4: Jump Tables: +; CHECK4-NEXT: jt#0: +; CHECK4-SAME: jt#1: +; CHECK4-SAME: jt#2: +; CHECK4-SAME: jt#3: +; CHECK4-NOT: jt#4: + +sw.bb: ; preds = %entry + %call = tail call i32 @ext1(i32 %b) #0 + br label %return + +sw.bb1: ; preds = %entry + %call2 = tail call i32 @ext2(i32 %b) #0 + br label %return + +sw.bb3: ; preds = %entry + %call4 = tail call i32 @ext3(i32 %b) #0 + br label %return + +sw.bb5: ; preds = %entry + %call6 = tail call i32 @ext4(i32 %b) #0 + br label %return + +sw.bb7: ; preds = %entry + %call8 = tail call i32 @ext5(i32 %b) #0 + br label %return + +sw.bb9: ; preds = %entry + %call10 = tail call i32 @ext6(i32 %b) #0 + br label %return + +sw.bb11: ; preds = %entry + %call12 = tail call i32 @ext7(i32 %b) #0 + br label %return + +sw.bb13: ; preds = %entry + %call14 = tail call i32 @ext8(i32 %b) #0 + br label %return + +sw.bb15: ; preds = %entry + %call16 = tail call i32 @ext9(i32 %b) #0 + br label %return + +sw.bb17: ; preds = %entry + %call18 = tail call i32 @ext10(i32 %b) #0 + br label %return + +sw.bb19: ; preds = %entry + %call20 = tail call i32 @ext11(i32 %b) #0 + br label %return + +sw.bb21: ; preds = %entry + %call22 = tail call i32 @ext12(i32 %b) #0 + br label %return + +sw.bb23: ; preds = %entry + %call24 = tail call i32 @ext13(i32 %b) #0 + br label %return + +sw.bb25: ; preds = %entry + %call26 = tail call i32 @ext14(i32 %b) #0 + br label %return + +sw.bb27: ; preds = %entry + %call28 = tail call i32 @ext15(i32 %b) #0 + br label %return + +sw.bb29: ; preds = %entry + %call30 = tail call i32 @ext16(i32 %b) #0 + br label %return + +sw.bb31: ; preds = %entry + %call32 = tail call i32 @ext17(i32 %b) #0 + br label %return + +return: ; preds = %entry, %sw.bb31, %sw.bb29, %sw.bb27, %sw.bb25, %sw.bb23, %sw.bb21, %sw.bb19, %sw.bb17, %sw.bb15, %sw.bb13, %sw.bb11, %sw.bb9, %sw.bb7, %sw.bb5, %sw.bb3, %sw.bb1, %sw.bb + %retval.0 = phi i32 [ %call32, %sw.bb31 ], [ %call30, %sw.bb29 ], [ %call28, %sw.bb27 ], [ %call26, %sw.bb25 ], [ %call24, %sw.bb23 ], [ %call22, %sw.bb21 ], [ %call20, %sw.bb19 ], [ %call18, %sw.bb17 ], [ %call16, %sw.bb15 ], [ %call14, %sw.bb13 ], [ %call12, %sw.bb11 ], [ %call10, %sw.bb9 ], [ %call8, %sw.bb7 ], [ %call6, %sw.bb5 ], [ %call4, %sw.bb3 ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ], [ 0, %entry ] + ret i32 %retval.0 +} + +declare i32 @ext1(i32) local_unnamed_addr #0 + +declare i32 @ext2(i32) local_unnamed_addr #0 + +declare i32 @ext3(i32) local_unnamed_addr #0 + +declare i32 @ext4(i32) local_unnamed_addr #0 + +declare i32 @ext5(i32) local_unnamed_addr #0 + +declare i32 @ext6(i32) local_unnamed_addr #0 + +declare i32 @ext7(i32) local_unnamed_addr #0 + +declare i32 @ext8(i32) local_unnamed_addr #0 + +declare i32 @ext9(i32) local_unnamed_addr #0 + +declare i32 @ext10(i32) local_unnamed_addr #0 + +declare i32 @ext11(i32) local_unnamed_addr #0 + +declare i32 @ext12(i32) local_unnamed_addr #0 + +declare i32 @ext13(i32) local_unnamed_addr #0 + +declare i32 @ext14(i32) local_unnamed_addr #0 + +declare i32 @ext15(i32) local_unnamed_addr #0 + +declare i32 @ext16(i32) local_unnamed_addr #0 + +declare i32 @ext17(i32) local_unnamed_addr #0 + +attributes #0 = { nounwind } +