Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -4882,7 +4882,7 @@ /// Create a lookup table to use as a switch replacement with the contents /// of Values, using DefaultValue to fill any holes in the table. SwitchLookupTable( - Module &M, uint64_t TableSize, ConstantInt *Offset, + Module &M, uint64_t TableSize, const SmallVectorImpl> &Values, Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName); @@ -4936,7 +4936,7 @@ } // end anonymous namespace SwitchLookupTable::SwitchLookupTable( - Module &M, uint64_t TableSize, ConstantInt *Offset, + Module &M, uint64_t TableSize, const SmallVectorImpl> &Values, Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) { assert(Values.size() && "Can't build lookup table without values!"); @@ -4954,7 +4954,7 @@ Constant *CaseRes = Values[I].second; assert(CaseRes->getType() == ValueType); - uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue(); + uint64_t Idx = CaseVal->getValue().getLimitedValue(); TableContents[Idx] = CaseRes; if (CaseRes != SingleValue) @@ -5293,9 +5293,9 @@ for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { ConstantInt *CaseVal = CI->getCaseValue(); - if (CaseVal->getValue().slt(MinCaseVal->getValue())) + if (CaseVal->getValue().ult(MinCaseVal->getValue())) MinCaseVal = CaseVal; - if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) + if (CaseVal->getValue().ugt(MaxCaseVal->getValue())) MaxCaseVal = CaseVal; // Resulting value at phi nodes for this case value. @@ -5321,8 +5321,7 @@ } uint64_t NumResults = ResultLists[PHIs[0]].size(); - APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); - uint64_t TableSize = RangeSpread.getLimitedValue() + 1; + uint64_t TableSize = MaxCaseVal->getValue().getLimitedValue() + 1; bool TableHasHoles = (NumResults < TableSize); // If the table has holes, we need a constant result for the default case @@ -5357,12 +5356,7 @@ // Compute the table index value. Builder.SetInsertPoint(SI); - Value *TableIndex; - if (MinCaseVal->isNullValue()) - TableIndex = SI->getCondition(); - else - TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, - "switch.tableidx"); + Value *TableIndex = SI->getCondition(); // Compute the maximum table size representable by the integer type we are // switching upon. @@ -5402,6 +5396,9 @@ LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest); + // When doing the register-sized hole-check, unconditionally use a subtraction. + TableIndex = Builder.CreateSub(TableIndex, MinCaseVal); + // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid // unnecessary illegal types. uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL)); @@ -5445,7 +5442,7 @@ // If using a bitmask, use any value to fill the lookup table holes. Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; StringRef FuncName = Fn->getName(); - SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL, + SwitchLookupTable Table(Mod, TableSize, ResultList, DV, DL, FuncName); Value *Result = Table.BuildLookup(TableIndex, Builder); @@ -5491,17 +5488,6 @@ return true; } -static bool isSwitchDense(ArrayRef Values) { - // See also SelectionDAGBuilder::isDense(), which this function was based on. - uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front(); - uint64_t Range = Diff + 1; - uint64_t NumCases = Values.size(); - // 40% is the default density for building a jump table in optsize/minsize mode. - uint64_t MinDensity = 40; - - return NumCases * 100 >= Range * MinDensity; -} - /// Try to transform a switch that has "holes" in it to a contiguous sequence /// of cases. /// @@ -5513,32 +5499,46 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI) { + // The number of cases that need to be removed by a subtraction operation + // to make it worth using. + const unsigned SubThreshold = (SI->getFunction()->hasOptSize() ? 2 : 8); auto *CondTy = cast(SI->getCondition()->getType()); - if (CondTy->getIntegerBitWidth() > 64 || - !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth())) + unsigned BitWidth = CondTy->getIntegerBitWidth(); + if (BitWidth > 64 || + !DL.fitsInLegalInteger(BitWidth)) return false; - // Only bother with this optimization if there are more than 3 switch cases; - // SDAG will only bother creating jump tables for 4 or more cases. - if (SI->getNumCases() < 4) + + // Ignore switches with less than three cases. These will always be lowered + // into a simple comparison. + if (SI->getNumCases() < 3) return false; - // This transform is agnostic to the signedness of the input or case values. We - // can treat the case values as signed or unsigned. We can optimize more common - // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values - // as signed. - SmallVector Values; + // We organize the range to start from 0, if it is not already close. + SmallVector Values; for (auto &C : SI->cases()) - Values.push_back(C.getCaseValue()->getValue().getSExtValue()); + Values.push_back(C.getCaseValue()->getValue().getLimitedValue()); llvm::sort(Values); - // If the switch is already dense, there's nothing useful to do here. - if (isSwitchDense(Values)) - return false; - - // First, transform the values such that they start at zero and ascend. - int64_t Base = Values[0]; - for (auto &V : Values) - V -= (uint64_t)(Base); + bool MadeChanges = false; + // We must first look find the best start point. + uint64_t BestDistance = APInt::getMaxValue(CondTy->getIntegerBitWidth()).getLimitedValue() - + Values.back() + Values.front() + 1; + unsigned BestIndex = 0; + for (unsigned i = 1;i != Values.size();i++) { + if (Values[i] - Values[i-1] > BestDistance) { + BestIndex = i; + BestDistance = Values[i] - Values[i-1]; + } + } + uint64_t Base = 0; + // Now transform the values such that they start at zero and ascend. + if ((BestDistance > SubThreshold) && + (BestIndex != 0 || (Values[0] >= SubThreshold))) { + Base = Values[BestIndex]; + MadeChanges = true; + for (auto &V : Values) + V = (APInt(BitWidth, V) - Base).getLimitedValue(); + } // This transform can be done speculatively because it is so cheap - it results // in a single rotate operation being inserted. @@ -5551,7 +5551,7 @@ // There is no edge condition when the BitWidth is less than 64, because if // 0 is the only value then a shift does nothing, and LLVM requires // well-formed IR to not have duplicate cases. - unsigned TZ = countTrailingZeros((uint64_t)V); + unsigned TZ = countTrailingZeros(V); if (TZ < Shift) { Shift = TZ; if (Shift == 0) @@ -5559,12 +5559,13 @@ } } if (Shift) { + MadeChanges = true; for (auto &V : Values) V = V >> Shift; } - if (!isSwitchDense(Values)) - // Transform didn't create a dense switch. + if (!MadeChanges) + // We didn't do anything. return false; // The obvious transform is to shift the switch condition right and emit a @@ -5630,6 +5631,9 @@ if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) return requestResimplify(); + if (ReduceSwitchRange(SI, Builder, DL, TTI)) + return requestResimplify(); + // The conversion from switch to lookup tables results in difficult-to-analyze // code and makes pruning branches much harder. This is a problem if the // switch expression itself can still be restricted as a result of inlining or @@ -5639,9 +5643,6 @@ SwitchToLookupTable(SI, Builder, DL, TTI)) return requestResimplify(); - if (ReduceSwitchRange(SI, Builder, DL, TTI)) - return requestResimplify(); - return false; } Index: test/Transforms/SimplifyCFG/rangereduce.ll =================================================================== --- test/Transforms/SimplifyCFG/rangereduce.ll +++ test/Transforms/SimplifyCFG/rangereduce.ll @@ -79,13 +79,14 @@ ret i128 99783 } -; Optimization shouldn't trigger; no holes present define i32 @test3(i32 %a) { ; CHECK-LABEL: @test3( -; CHECK-NEXT: switch i32 [[A:%.*]], label [[DEF:%.*]] [ -; CHECK-NEXT: i32 97, label [[ONE:%.*]] -; CHECK-NEXT: i32 98, label [[TWO:%.*]] -; CHECK-NEXT: i32 99, label [[THREE:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], 97 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP1]], i32 [[TMP1]], i32 0) +; CHECK-NEXT: switch i32 [[TMP2]], label [[DEF:%.*]] [ +; CHECK-NEXT: i32 0, label [[ONE:%.*]] +; CHECK-NEXT: i32 1, label [[TWO:%.*]] +; CHECK-NEXT: i32 2, label [[THREE:%.*]] ; CHECK-NEXT: ] ; CHECK: def: ; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ] @@ -114,14 +115,15 @@ ret i32 99783 } -; Optimization shouldn't trigger; not an arithmetic progression define i32 @test4(i32 %a) { ; CHECK-LABEL: @test4( -; CHECK-NEXT: switch i32 [[A:%.*]], label [[DEF:%.*]] [ -; CHECK-NEXT: i32 97, label [[ONE:%.*]] -; CHECK-NEXT: i32 102, label [[TWO:%.*]] -; CHECK-NEXT: i32 105, label [[THREE:%.*]] -; CHECK-NEXT: i32 109, label [[THREE]] +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], 97 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP1]], i32 [[TMP1]], i32 0) +; CHECK-NEXT: switch i32 [[TMP2]], label [[DEF:%.*]] [ +; CHECK-NEXT: i32 0, label [[ONE:%.*]] +; CHECK-NEXT: i32 5, label [[TWO:%.*]] +; CHECK-NEXT: i32 8, label [[THREE:%.*]] +; CHECK-NEXT: i32 12, label [[THREE]] ; CHECK-NEXT: ] ; CHECK: def: ; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ] @@ -151,14 +153,15 @@ ret i32 99783 } -; Optimization shouldn't trigger; not a power of two define i32 @test5(i32 %a) { ; CHECK-LABEL: @test5( -; CHECK-NEXT: switch i32 [[A:%.*]], label [[DEF:%.*]] [ -; CHECK-NEXT: i32 97, label [[ONE:%.*]] -; CHECK-NEXT: i32 102, label [[TWO:%.*]] -; CHECK-NEXT: i32 107, label [[THREE:%.*]] -; CHECK-NEXT: i32 112, label [[THREE]] +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], 97 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP1]], i32 [[TMP1]], i32 0) +; CHECK-NEXT: switch i32 [[TMP2]], label [[DEF:%.*]] [ +; CHECK-NEXT: i32 0, label [[ONE:%.*]] +; CHECK-NEXT: i32 5, label [[TWO:%.*]] +; CHECK-NEXT: i32 10, label [[THREE:%.*]] +; CHECK-NEXT: i32 15, label [[THREE]] ; CHECK-NEXT: ] ; CHECK: def: ; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ] @@ -299,13 +302,13 @@ define i32 @test9(i32 %a) { ; CHECK-LABEL: @test9( -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], 6 +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP1]], i32 [[TMP1]], i32 1) ; CHECK-NEXT: switch i32 [[TMP2]], label [[DEF:%.*]] [ -; CHECK-NEXT: i32 6, label [[ONE:%.*]] -; CHECK-NEXT: i32 7, label [[TWO:%.*]] -; CHECK-NEXT: i32 0, label [[THREE:%.*]] -; CHECK-NEXT: i32 2, label [[THREE]] +; CHECK-NEXT: i32 9, label [[ONE:%.*]] +; CHECK-NEXT: i32 10, label [[TWO:%.*]] +; CHECK-NEXT: i32 3, label [[THREE:%.*]] +; CHECK-NEXT: i32 5, label [[THREE]] ; CHECK-NEXT: ] ; CHECK: def: ; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ] Index: test/Transforms/SimplifyCFG/switch-simplify-range.ll =================================================================== --- test/Transforms/SimplifyCFG/switch-simplify-range.ll +++ test/Transforms/SimplifyCFG/switch-simplify-range.ll @@ -8,26 +8,16 @@ define i64 @switch_common_right_bits(i8 %a) #0 { ; CHECK-LABEL: @switch_common_right_bits( ; CHECK-NEXT: Entry: -; CHECK-NEXT: switch i8 [[A:%.*]], label [[SWITCHELSE:%.*]] [ -; CHECK-NEXT: i8 123, label [[SWITCHPRONG:%.*]] -; CHECK-NEXT: i8 125, label [[SWITCHPRONG1:%.*]] -; CHECK-NEXT: i8 127, label [[SWITCHPRONG2:%.*]] -; CHECK-NEXT: i8 -127, label [[SWITCHPRONG3:%.*]] -; CHECK-NEXT: i8 -125, label [[SWITCHPRONG4:%.*]] -; CHECK-NEXT: ] +; CHECK-NEXT: [[TMP0:%.*]] = sub i8 [[A:%.*]], 123 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP0]], i8 [[TMP0]], i8 1) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 5 +; CHECK-NEXT: br i1 [[TMP2]], label [[SWITCH_LOOKUP:%.*]], label [[SWITCHELSE:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* @switch.table.switch_common_right_bits, i32 0, i8 [[TMP1]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i64, i64* [[SWITCH_GEP]] +; CHECK-NEXT: ret i64 [[SWITCH_LOAD]] ; CHECK: SwitchElse: -; CHECK-NEXT: [[MERGE:%.*]] = phi i64 [ 10, [[ENTRY:%.*]] ], [ 6, [[SWITCHPRONG]] ], [ 3, [[SWITCHPRONG1]] ], [ 4, [[SWITCHPRONG2]] ], [ 2, [[SWITCHPRONG3]] ], [ 1, [[SWITCHPRONG4]] ] -; CHECK-NEXT: ret i64 [[MERGE]] -; CHECK: SwitchProng: -; CHECK-NEXT: br label [[SWITCHELSE]] -; CHECK: SwitchProng1: -; CHECK-NEXT: br label [[SWITCHELSE]] -; CHECK: SwitchProng2: -; CHECK-NEXT: br label [[SWITCHELSE]] -; CHECK: SwitchProng3: -; CHECK-NEXT: br label [[SWITCHELSE]] -; CHECK: SwitchProng4: -; CHECK-NEXT: br label [[SWITCHELSE]] +; CHECK-NEXT: ret i64 10 ; Entry: switch i8 %a, label %SwitchElse [ @@ -166,11 +156,10 @@ define i16 @switch_not_normalized_to_start_at_zero(i8 %a) #0 { ; CHECK-LABEL: @switch_not_normalized_to_start_at_zero( ; CHECK-NEXT: Entry: -; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i8 [[A:%.*]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i8 [[SWITCH_TABLEIDX]], 5 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i8 [[A:%.*]], 6 ; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[SWITCHELSE:%.*]] ; CHECK: switch.lookup: -; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [5 x i16], [5 x i16]* @switch.table.switch_not_normalized_to_start_at_zero, i32 0, i8 [[SWITCH_TABLEIDX]] +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i16], [6 x i16]* @switch.table.switch_not_normalized_to_start_at_zero, i32 0, i8 [[A]] ; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i16, i16* [[SWITCH_GEP]] ; CHECK-NEXT: ret i16 [[SWITCH_LOAD]] ; CHECK: SwitchElse: