@@ -54,11 +54,6 @@ static cl::opt<int>
54
54
cl::init (45 ),
55
55
cl::desc(" Threshold for inlining cold callsites" ));
56
56
57
- static cl::opt<bool >
58
- EnableGenericSwitchCost (" inline-generic-switch-cost" , cl::Hidden,
59
- cl::init (false ),
60
- cl::desc(" Enable generic switch cost model" ));
61
-
62
57
// We introduce this threshold to help performance of instrumentation based
63
58
// PGO before we actually hook up inliner with analysis passes such as BPI and
64
59
// BFI.
@@ -1015,83 +1010,68 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
1015
1010
if (isa<ConstantInt>(V))
1016
1011
return true ;
1017
1012
1018
- if (EnableGenericSwitchCost) {
1019
- // Assume the most general case where the swith is lowered into
1020
- // either a jump table, bit test, or a balanced binary tree consisting of
1021
- // case clusters without merging adjacent clusters with the same
1022
- // destination. We do not consider the switches that are lowered with a mix
1023
- // of jump table/bit test/binary search tree. The cost of the switch is
1024
- // proportional to the size of the tree or the size of jump table range.
1025
-
1026
- // Exit early for a large switch, assuming one case needs at least one
1027
- // instruction.
1028
- // FIXME: This is not true for a bit test, but ignore such case for now to
1029
- // save compile-time.
1030
- int64_t CostLowerBound =
1031
- std::min ((int64_t )INT_MAX,
1032
- (int64_t )SI.getNumCases () * InlineConstants::InstrCost + Cost);
1033
-
1034
- if (CostLowerBound > Threshold) {
1035
- Cost = CostLowerBound;
1036
- return false ;
1037
- }
1013
+ // Assume the most general case where the swith is lowered into
1014
+ // either a jump table, bit test, or a balanced binary tree consisting of
1015
+ // case clusters without merging adjacent clusters with the same
1016
+ // destination. We do not consider the switches that are lowered with a mix
1017
+ // of jump table/bit test/binary search tree. The cost of the switch is
1018
+ // proportional to the size of the tree or the size of jump table range.
1019
+ //
1020
+ // NB: We convert large switches which are just used to initialize large phi
1021
+ // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1022
+ // inlining those. It will prevent inlining in cases where the optimization
1023
+ // does not (yet) fire.
1038
1024
1039
- unsigned JumpTableSize = 0 ;
1040
- unsigned NumCaseCluster =
1041
- TTI.getEstimatedNumberOfCaseClusters (SI, JumpTableSize);
1025
+ // Exit early for a large switch, assuming one case needs at least one
1026
+ // instruction.
1027
+ // FIXME: This is not true for a bit test, but ignore such case for now to
1028
+ // save compile-time.
1029
+ int64_t CostLowerBound =
1030
+ std::min ((int64_t )INT_MAX,
1031
+ (int64_t )SI.getNumCases () * InlineConstants::InstrCost + Cost);
1042
1032
1043
- // If suitable for a jump table, consider the cost for the table size and
1044
- // branch to destination.
1045
- if (JumpTableSize) {
1046
- int64_t JTCost = (int64_t )JumpTableSize * InlineConstants::InstrCost +
1047
- 4 * InlineConstants::InstrCost;
1048
- Cost = std::min ((int64_t )INT_MAX, JTCost + Cost);
1049
- return false ;
1050
- }
1033
+ if (CostLowerBound > Threshold) {
1034
+ Cost = CostLowerBound;
1035
+ return false ;
1036
+ }
1051
1037
1052
- // Considering forming a binary search, we should find the number of nodes
1053
- // which is same as the number of comparisons when lowered. For a given
1054
- // number of clusters, n, we can define a recursive function, f(n), to find
1055
- // the number of nodes in the tree. The recursion is :
1056
- // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
1057
- // and f(n) = n, when n <= 3.
1058
- // This will lead a binary tree where the leaf should be either f(2) or f(3)
1059
- // when n > 3. So, the number of comparisons from leaves should be n, while
1060
- // the number of non-leaf should be :
1061
- // 2^(log2(n) - 1) - 1
1062
- // = 2^log2(n) * 2^-1 - 1
1063
- // = n / 2 - 1.
1064
- // Considering comparisons from leaf and non-leaf nodes, we can estimate the
1065
- // number of comparisons in a simple closed form :
1066
- // n + n / 2 - 1 = n * 3 / 2 - 1
1067
- if (NumCaseCluster <= 3 ) {
1068
- // Suppose a comparison includes one compare and one conditional branch.
1069
- Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
1070
- return false ;
1071
- }
1072
- int64_t ExpectedNumberOfCompare = 3 * (uint64_t )NumCaseCluster / 2 - 1 ;
1073
- uint64_t SwitchCost =
1074
- ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
1075
- Cost = std::min ((uint64_t )INT_MAX, SwitchCost + Cost);
1038
+ unsigned JumpTableSize = 0 ;
1039
+ unsigned NumCaseCluster =
1040
+ TTI.getEstimatedNumberOfCaseClusters (SI, JumpTableSize);
1041
+
1042
+ // If suitable for a jump table, consider the cost for the table size and
1043
+ // branch to destination.
1044
+ if (JumpTableSize) {
1045
+ int64_t JTCost = (int64_t )JumpTableSize * InlineConstants::InstrCost +
1046
+ 4 * InlineConstants::InstrCost;
1047
+ Cost = std::min ((int64_t )INT_MAX, JTCost + Cost);
1076
1048
return false ;
1077
1049
}
1078
1050
1079
- // Use a simple switch cost model where we accumulate a cost proportional to
1080
- // the number of distinct successor blocks. This fan-out in the CFG cannot
1081
- // be represented for free even if we can represent the core switch as a
1082
- // jumptable that takes a single instruction.
1083
- // /
1084
- // NB: We convert large switches which are just used to initialize large phi
1085
- // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1086
- // inlining those. It will prevent inlining in cases where the optimization
1087
- // does not (yet) fire.
1088
- SmallPtrSet<BasicBlock *, 8 > SuccessorBlocks;
1089
- SuccessorBlocks.insert (SI.getDefaultDest ());
1090
- for (auto Case : SI.cases ())
1091
- SuccessorBlocks.insert (Case.getCaseSuccessor ());
1092
- // Add cost corresponding to the number of distinct destinations. The first
1093
- // we model as free because of fallthrough.
1094
- Cost += (SuccessorBlocks.size () - 1 ) * InlineConstants::InstrCost;
1051
+ // Considering forming a binary search, we should find the number of nodes
1052
+ // which is same as the number of comparisons when lowered. For a given
1053
+ // number of clusters, n, we can define a recursive function, f(n), to find
1054
+ // the number of nodes in the tree. The recursion is :
1055
+ // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
1056
+ // and f(n) = n, when n <= 3.
1057
+ // This will lead a binary tree where the leaf should be either f(2) or f(3)
1058
+ // when n > 3. So, the number of comparisons from leaves should be n, while
1059
+ // the number of non-leaf should be :
1060
+ // 2^(log2(n) - 1) - 1
1061
+ // = 2^log2(n) * 2^-1 - 1
1062
+ // = n / 2 - 1.
1063
+ // Considering comparisons from leaf and non-leaf nodes, we can estimate the
1064
+ // number of comparisons in a simple closed form :
1065
+ // n + n / 2 - 1 = n * 3 / 2 - 1
1066
+ if (NumCaseCluster <= 3 ) {
1067
+ // Suppose a comparison includes one compare and one conditional branch.
1068
+ Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
1069
+ return false ;
1070
+ }
1071
+ int64_t ExpectedNumberOfCompare = 3 * (uint64_t )NumCaseCluster / 2 - 1 ;
1072
+ uint64_t SwitchCost =
1073
+ ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
1074
+ Cost = std::min ((uint64_t )INT_MAX, SwitchCost + Cost);
1095
1075
return false ;
1096
1076
}
1097
1077
0 commit comments