Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -575,6 +575,13 @@ /// containing this constant value for the target. bool shouldBuildLookupTablesForConstant(Constant *C) const; + /// Given a table size (including holes), number of elements (not including + /// holes), byte width of cases, and if this function is being compiled opt + /// size, return true if this switch should be turned in to a lookup table for + /// this target. + bool shouldBuildLookupTable(size_t TableSize, uint32_t NumCases, + unsigned CaseSize, bool HasOptSize) const; + /// Return true if the input function which is cold at all call sites, /// should use coldcc calling convention. bool useColdCCForColdCall(Function &F) const; @@ -1106,6 +1113,8 @@ virtual unsigned getJumpBufSize() = 0; virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; + virtual bool shouldBuildLookupTable(uint64_t TableSize, uint32_t NumCases, + unsigned CaseSize, bool HasOptSize) = 0; virtual bool useColdCCForColdCall(Function &F) = 0; virtual unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0; @@ -1382,6 +1391,11 @@ bool shouldBuildLookupTablesForConstant(Constant *C) override { return Impl.shouldBuildLookupTablesForConstant(C); } + bool shouldBuildLookupTable(uint64_t TableSize, uint32_t NumCases, + unsigned CaseSize, bool HasOptSize) override { + return Impl.shouldBuildLookupTable(TableSize, NumCases, CaseSize, + HasOptSize); + } bool useColdCCForColdCall(Function &F) override { return Impl.useColdCCForColdCall(F); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -261,6 +261,28 @@ bool shouldBuildLookupTables() { return true; } bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } + bool shouldBuildLookupTable(uint64_t TableSize, uint32_t NumCases, + unsigned CaseSize, bool HasOptSize) { + assert(TableSize >= NumCases); + // This is a limitation of the sparse maps implementation + if (TableSize >= (1ULL << 38)) + return false; + + // This is a guess of whether the table will be smaller. + // Tables have O(1) performance, compared to the O(log n) performance of + // the binary search that switches get lowered into, so we prefer them when + // they are smaller. + if (TableSize * CaseSize + 16 < (uint64_t)NumCases * 16) + return true; + + // Space is more important than performance when using -Os + if (HasOptSize) + return false; + + // The table density should be at least 1/3rd (33%) for 64-bit integers. + // This is a guess. FIXME: Find the best cut-off. + return (uint64_t)NumCases * 3 * (64 / 8) >= TableSize * CaseSize; + } bool useColdCCForColdCall(Function &F) { return false; } Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -249,6 +249,13 @@ bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const { return TTIImpl->shouldBuildLookupTablesForConstant(C); } +bool TargetTransformInfo::shouldBuildLookupTable(uint64_t TableSize, + uint32_t NumCases, + unsigned CaseSize, + bool HasOptSize) const { + return TTIImpl->shouldBuildLookupTable(TableSize, NumCases, CaseSize, + HasOptSize); +} bool TargetTransformInfo::useColdCCForColdCall(Function &F) const { return TTIImpl->useColdCCForColdCall(F); Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -5128,11 +5128,19 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap &ResultTypes) { - if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) - return false; // TableSize overflowed, or mul below might overflow. + if (SI->getNumCases() > TableSize) + return false; // TableSize overflowed + + // If the table only contains i8 or smaller condition, it has a bounded size + // of 256 times the largest legal size, and will generally be more performant + // with a lookup table. + if (!SI->getFunction()->hasOptSize() && + (DL.getTypeAllocSize(SI->getCondition()->getType()) * 8) <= 8) + return true; bool AllTablesFitInRegister = true; bool HasIllegalType = false; + unsigned LargestTypeSize = 0; for (const auto &I : ResultTypes) { Type *Ty = I.second; @@ -5144,9 +5152,10 @@ AllTablesFitInRegister && SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty); - // If both flags saturate, we're done. NOTE: This *only* works with - // saturating flags, and all flags have to saturate first due to the - // non-deterministic behavior of iterating over a dense map. + LargestTypeSize = + std::max((uint32_t)DL.getTypeAllocSize(Ty), LargestTypeSize); + + // If both flags saturate, we're done. if (HasIllegalType && !AllTablesFitInRegister) break; } @@ -5155,14 +5164,9 @@ if (AllTablesFitInRegister) return true; - // Don't build a table that doesn't fit in-register if it has illegal types. - if (HasIllegalType) - return false; - - // The table density should be at least 40%. This is the same criterion as for - // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. - // FIXME: Find the best cut-off. - return SI->getNumCases() * 10 >= TableSize * 4; + return TTI.shouldBuildLookupTable(TableSize, SI->getNumCases(), + LargestTypeSize, + SI->getFunction()->hasOptSize()); } /// Try to reuse the switch table index compare. Following pattern: @@ -5248,6 +5252,127 @@ } } +/// Try to transform a switch that has "holes" in it to a contiguous sequence +/// of cases. +/// +/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be +/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}. +/// +/// This converts a sparse switch into a dense switch which allows better +/// lowering and could also allow transforming into a lookup table. +static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, + const DataLayout &DL, + const TargetTransformInfo &TTI) { + // The number of cases that need to be removed by a subtraction operation + // to make it worth using. + const unsigned SubThreshold = (SI->getFunction()->hasOptSize() ? 2 : 8); + auto *CondTy = cast(SI->getCondition()->getType()); + unsigned BitWidth = CondTy->getIntegerBitWidth(); + if (BitWidth > 64 || !DL.fitsInLegalInteger(BitWidth)) + return false; + // Only bother with this optimization if there are more than 3 switch cases; + // SDAG will only bother creating jump tables for 4 or more cases. + // This is also useful when using the LowerSwitch transform, but not with + // so few cases. + if (SI->getNumCases() < 4) + return false; + + // We organize the range to start from 0, if it is not already close. + SmallVector Values; + for (auto &C : SI->cases()) + Values.push_back(C.getCaseValue()->getValue().getLimitedValue()); + llvm::sort(Values); + + bool MadeChanges = false; + + // We must first look find the best start point, for example if we have a + // series that crosses zero: -2, -1, 0, 1, 2. + uint64_t BestDistance = + APInt::getMaxValue(CondTy->getIntegerBitWidth()).getLimitedValue() - + Values.back() + Values.front() + 1; + unsigned BestIndex = 0; + for (unsigned I = 1, E = Values.size(); I != E; I++) { + if (Values[I] - Values[I - 1] > BestDistance) { + BestIndex = I; + BestDistance = Values[I] - Values[I - 1]; + } + } + + // This transform can be done speculatively because it is so cheap - it + // results in a single rotate operation being inserted. + + // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than + // one element and LLVM disallows duplicate cases, Shift is guaranteed to be + // less than 64. + unsigned Shift = 64; + // We need to store this from _before_ the transform + uint64_t BestIndexXor = Values[BestIndex]; + for (auto &V : Values) + Shift = std::min(Shift, countTrailingZeros(V ^ BestIndexXor)); + assert(Shift < 64); + if (Shift > 0) { + MadeChanges = true; + for (auto &V : Values) + V >>= Shift; + } + + // We Xor against Values[] (any element will do) because the if we do not + // start at zero, but also don't meet the SubThreshold, then we still might + // share common rights bits, and if this transform succeeds + // then we should insert the subtraction anyways, because the rotate trick + // below to avoid a branch needs the shifted away bits to be zero. + + // Now transform the values such that they start at zero and ascend. Do not + // do this if the shift reduces the lowest value to less than SubThreshold, + // or if the subtraction is less than SubThreshold and it does not enable a + // rotate. + uint64_t Base = 0; + if ((BestIndexXor >= SubThreshold && Shift == 0) || + (Shift > countTrailingZeros(BestIndexXor) && + Values[BestIndex] >= SubThreshold)) { + Base = BestIndexXor; + MadeChanges = true; + for (auto &V : Values) + V = (APInt(BitWidth, V) - Base).getLimitedValue(); + } + + if (!MadeChanges) + // We didn't do anything. + return false; + + // The obvious transform is to shift the switch condition right and emit a + // check that the condition actually cleanly divided by GCD, i.e. + // C & (1 << Shift - 1) == 0 + // inserting a new CFG edge to handle the case where it didn't divide cleanly. + // + // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the + // shift and puts the shifted-off bits in the uppermost bits. If any of these + // are nonzero then the switch condition will be very large and will hit the + // default case. + + auto *Ty = cast(SI->getCondition()->getType()); + Builder.SetInsertPoint(SI); + Value *Key = SI->getCondition(); + if (Base > 0) + Key = Builder.CreateSub(Key, ConstantInt::get(Ty, Base), + "switch.rangereduce"); + if (Shift > 0) { + // FIXME replace with fshr? + auto *ShiftC = ConstantInt::get(Ty, Shift); + auto *LShr = Builder.CreateLShr(Key, ShiftC); + auto *Shl = Builder.CreateShl(Key, Ty->getBitWidth() - Shift); + Key = Builder.CreateOr(LShr, Shl); + } + SI->replaceUsesOfWith(SI->getCondition(), Key); + + for (auto Case : SI->cases()) { + auto *Orig = Case.getCaseValue(); + auto Sub = Orig->getValue() - Base; + Case.setValue(cast(ConstantInt::get(Ty, Sub.lshr(Shift)))); + } + return true; +} + /// If the switch is only used to initialize one or more phi nodes in a common /// successor block with different constant values, replace the switch with /// lookup tables. @@ -5346,6 +5471,23 @@ DefaultResults[PHI] = Result; } + // Compute the maximum table size representable by the integer type we are + // switching upon. + unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits(); + uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize; + assert(MaxTableSize >= TableSize && + "It is impossible for a switch to have more entries than the max " + "representable value of its input integer type's size."); + + // If the table is only a u8 and we do not have to check for the default case, + // extend the table so we can get rid of the branch. + bool ExtendToCoveredTable = MaxTableSize <= 256 && HasDefaultResults && + !SI->getFunction()->hasOptSize(); + if (ExtendToCoveredTable) + TableSize = MaxTableSize; + else if (ReduceSwitchRange(SI, Builder, DL, TTI)) + return true; // We will get called again + if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes)) return false; @@ -5354,18 +5496,9 @@ BasicBlock *LookupBB = BasicBlock::Create( Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest); - // Compute the table index value. Builder.SetInsertPoint(SI); Value *TableIndex = SI->getCondition(); - // Compute the maximum table size representable by the integer type we are - // switching upon. - unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits(); - uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize; - assert(MaxTableSize >= TableSize && - "It is impossible for a switch to have more entries than the max " - "representable value of its input integer type's size."); - // If the default destination is unreachable, or if the lookup table covers // all values of the conditional variable, branch directly to the lookup table // BB. Otherwise, check that the condition is within the case range. @@ -5492,128 +5625,6 @@ return true; } -/// Try to transform a switch that has "holes" in it to a contiguous sequence -/// of cases. -/// -/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be -/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}. -/// -/// This converts a sparse switch into a dense switch which allows better -/// lowering and could also allow transforming into a lookup table. -static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, - const DataLayout &DL, - const TargetTransformInfo &TTI) { - // The number of cases that need to be removed by a subtraction operation - // to make it worth using. - const unsigned SubThreshold = (SI->getFunction()->hasOptSize() ? 2 : 8); - auto *CondTy = cast(SI->getCondition()->getType()); - unsigned BitWidth = CondTy->getIntegerBitWidth(); - if (BitWidth > 64 || !DL.fitsInLegalInteger(BitWidth)) - return false; - // Only bother with this optimization if there are more than 3 switch cases; - // SDAG will only bother creating jump tables for 4 or more cases. - // This is also useful when using the LowerSwitch transform, but not with - // so few cases. - if (SI->getNumCases() < 4) - return false; - - // We organize the range to start from 0, if it is not already close. - SmallVector Values; - for (auto &C : SI->cases()) - Values.push_back(C.getCaseValue()->getValue().getLimitedValue()); - llvm::sort(Values); - - bool MadeChanges = false; - // We must first look find the best start point, for example if we have a - // series that crosses zero: -2, -1, 0, 1, 2. - uint64_t BestDistance = - APInt::getMaxValue(CondTy->getIntegerBitWidth()).getLimitedValue() - - Values.back() + Values.front() + 1; - unsigned BestIndex = 0; - for (unsigned I = 1, E = Values.size(); I != E; I++) { - if (Values[I] - Values[I - 1] > BestDistance) { - BestIndex = I; - BestDistance = Values[I] - Values[I - 1]; - } - } - - // This transform can be done speculatively because it is so cheap - it - // results in a single rotate operation being inserted. - // FIXME: It's possible that optimizing a switch on powers of two might also - // be beneficial - flag values are often powers of two and we could use a CLZ - // as the key function. - - // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than - // one element and LLVM disallows duplicate cases, Shift is guaranteed to be - // less than 64. - unsigned Shift = 64; - // We need to store this from _before_ the transform - uint64_t BestIndexXor = Values[BestIndex]; - for (auto &V : Values) - Shift = std::min(Shift, countTrailingZeros(V ^ BestIndexXor)); - assert(Shift < 64); - if (Shift > 0) { - MadeChanges = true; - for (auto &V : Values) - V >>= Shift; - } - - // We Xor against Values[] (any element will do) because the if we do not - // start at zero, but also don't meet the SubThreshold, then we still might - // share common rights bits, and if this transform succeeds - // then we should insert the subtraction anyways, because the rotate trick - // below to avoid a branch needs the shifted away bits to be zero. - - // Now transform the values such that they start at zero and ascend. Do not - // do this if the shift reduces the lowest value to less than SubThreshold, - // or if the subtraction is less than SubThreshold and it does not enable a - // rotate. - uint64_t Base = 0; - if ((BestIndexXor >= SubThreshold && Shift == 0) || - (Shift > countTrailingZeros(BestIndexXor) && - Values[BestIndex] >= SubThreshold)) { - Base = BestIndexXor; - MadeChanges = true; - for (auto &V : Values) - V = (APInt(BitWidth, V) - Base).getLimitedValue(); - } - - if (!MadeChanges) - // We didn't do anything. - return false; - - // The obvious transform is to shift the switch condition right and emit a - // check that the condition actually cleanly divided by GCD, i.e. - // C & (1 << Shift - 1) == 0 - // inserting a new CFG edge to handle the case where it didn't divide cleanly. - // - // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the - // shift and puts the shifted-off bits in the uppermost bits. If any of these - // are nonzero then the switch condition will be very large and will hit the - // default case. - - auto *Ty = cast(SI->getCondition()->getType()); - Builder.SetInsertPoint(SI); - Value *Key = SI->getCondition(); - if (Base > 0) - Key = Builder.CreateSub(Key, ConstantInt::get(Ty, Base)); - if (Shift > 0) { - // FIXME replace with fshr? - auto *ShiftC = ConstantInt::get(Ty, Shift); - auto *LShr = Builder.CreateLShr(Key, ShiftC); - auto *Shl = Builder.CreateShl(Key, Ty->getBitWidth() - Shift); - Key = Builder.CreateOr(LShr, Shl); - } - SI->replaceUsesOfWith(SI->getCondition(), Key); - - for (auto Case : SI->cases()) { - auto *Orig = Case.getCaseValue(); - auto Sub = Orig->getValue() - Base; - Case.setValue(cast(ConstantInt::get(Ty, Sub.lshr(Shift)))); - } - return true; -} - bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { BasicBlock *BB = SI->getParent(); @@ -5650,9 +5661,6 @@ if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) return requestResimplify(); - if (ReduceSwitchRange(SI, Builder, DL, TTI)) - return requestResimplify(); - // The conversion from switch to lookup tables results in difficult-to-analyze // code and makes pruning branches much harder. This is a problem if the // switch expression itself can still be restricted as a result of inlining or @@ -5662,6 +5670,11 @@ SwitchToLookupTable(SI, Builder, DL, TTI)) return requestResimplify(); + // Call ReduceSwitchRange *after* SwitchToLookupTable as SwitchToLookupTable + // calls this internally. + if (ReduceSwitchRange(SI, Builder, DL, TTI)) + return requestResimplify(); + return false; } Index: test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll =================================================================== --- test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll +++ test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll @@ -12,8 +12,8 @@ define i32 @foo(i32 %c) "no-jump-tables"="true" { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[C:%.*]], 42 -; CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [ +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[C:%.*]], 42 +; CHECK-NEXT: switch i32 [[SWITCH_RANGEREDUCE]], label [[SW_DEFAULT:%.*]] [ ; CHECK-NEXT: i32 0, label [[RETURN:%.*]] ; CHECK-NEXT: i32 1, label [[SW_BB1:%.*]] ; CHECK-NEXT: i32 2, label [[SW_BB2:%.*]] @@ -52,11 +52,11 @@ define i32 @bar(i32 %c) { ; CHECK-LABEL: @bar( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[C:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 4 -; CHECK-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[C:%.*]], 42 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_RANGEREDUCE]], 4 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] ; CHECK: switch.lookup: -; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.bar, i32 0, i32 [[TMP0]] +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.bar, i32 0, i32 [[SWITCH_RANGEREDUCE]] ; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]] ; CHECK-NEXT: ret i32 [[SWITCH_LOAD]] ; CHECK: return: Index: test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll =================================================================== --- test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll +++ test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll @@ -9,16 +9,10 @@ define i64 @test(i3 %arg) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i3 [[ARG:%.*]], -1 -; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[DEFAULT:%.*]] -; CHECK: switch.lookup: -; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[ARG]] to i4 -; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* @switch.table.test, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]] +; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[ARG:%.*]] to i4 +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* @switch.table.test, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]] ; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i64, i64* [[SWITCH_GEP]] -; CHECK-NEXT: br label [[DEFAULT]] -; CHECK: Default: -; CHECK-NEXT: [[V1:%.*]] = phi i64 [ 8, [[ENTRY:%.*]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ] -; CHECK-NEXT: [[V3:%.*]] = add i64 [[V1]], 0 +; CHECK-NEXT: [[V3:%.*]] = add i64 [[SWITCH_LOAD]], 0 ; CHECK-NEXT: ret i64 [[V3]] ; entry: Index: test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll =================================================================== --- test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll +++ test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll @@ -36,11 +36,11 @@ define i32 @f(i32 %c) { ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[C:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 7 -; CHECK-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[C:%.*]], 42 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_RANGEREDUCE]], 7 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] ; CHECK: switch.lookup: -; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table.f, i32 0, i32 [[TMP0]] +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table.f, i32 0, i32 [[SWITCH_RANGEREDUCE]] ; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]] ; CHECK-NEXT: ret i32 [[SWITCH_LOAD]] ; CHECK: return: @@ -75,11 +75,11 @@ define i8 @char(i32 %c) { ; CHECK-LABEL: @char( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[C:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 9 -; CHECK-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[C:%.*]], 42 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_RANGEREDUCE]], 9 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] ; CHECK: switch.lookup: -; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [9 x i8], [9 x i8]* @switch.table.char, i32 0, i32 [[TMP0]] +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [9 x i8], [9 x i8]* @switch.table.char, i32 0, i32 [[SWITCH_RANGEREDUCE]] ; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i8, i8* [[SWITCH_GEP]] ; CHECK-NEXT: ret i8 [[SWITCH_LOAD]] ; CHECK: return: @@ -243,20 +243,15 @@ ; CHECK-LABEL: @crud( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[C:%.*]], 33 -; CHECK-NEXT: br i1 [[CMP]], label [[LOR_END:%.*]], label [[SWITCH_EARLY_TEST:%.*]] -; CHECK: switch.early.test: -; CHECK-NEXT: [[TMP0:%.*]] = sub i8 [[C]], 34 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[TMP0]], 59 -; CHECK-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[LOR_END]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOR_END:%.*]], label [[SWITCH_LOOKUP:%.*]] ; CHECK: switch.lookup: -; CHECK-NEXT: [[SWITCH_CAST:%.*]] = zext i8 [[TMP0]] to i59 -; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul i59 [[SWITCH_CAST]], 1 -; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i59 -288230375765830623, [[SWITCH_SHIFTAMT]] -; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i59 [[SWITCH_DOWNSHIFT]] to i1 +; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i8 [[C]] to i9 +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [256 x i1], [256 x i1]* @switch.table.crud, i32 0, i9 [[SWITCH_TABLEIDX_ZEXT]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i1, i1* [[SWITCH_GEP]] ; CHECK-NEXT: br label [[LOR_END]] ; CHECK: lor.end: -; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ [[SWITCH_MASKED]], [[SWITCH_LOOKUP]] ], [ false, [[SWITCH_EARLY_TEST]] ] -; CHECK-NEXT: [[LOR_EXT:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ] +; CHECK-NEXT: [[LOR_EXT:%.*]] = zext i1 [[TMP0]] to i32 ; CHECK-NEXT: ret i32 [[LOR_EXT]] ; entry: @@ -300,8 +295,8 @@ define i32 @overflow(i32 %type) { ; CHECK-LABEL: @overflow( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[TYPE:%.*]], -2147483645 -; CHECK-NEXT: switch i32 [[TMP0]], label [[IF_END:%.*]] [ +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[TYPE:%.*]], -2147483645 +; CHECK-NEXT: switch i32 [[SWITCH_RANGEREDUCE]], label [[IF_END:%.*]] [ ; CHECK-NEXT: i32 -2147483648, label [[SW_BB3:%.*]] ; CHECK-NEXT: i32 0, label [[SW_BB3]] ; CHECK-NEXT: i32 2147483646, label [[SW_BB1:%.*]] @@ -923,27 +918,15 @@ define i96 @illegaltype(i32 %c) { ; CHECK-LABEL: @illegaltype( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[C:%.*]], 42 -; CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [ -; CHECK-NEXT: i32 0, label [[RETURN:%.*]] -; CHECK-NEXT: i32 1, label [[SW_BB1:%.*]] -; CHECK-NEXT: i32 2, label [[SW_BB2:%.*]] -; CHECK-NEXT: i32 3, label [[SW_BB3:%.*]] -; CHECK-NEXT: i32 4, label [[SW_BB4:%.*]] -; CHECK-NEXT: ] -; CHECK: sw.bb1: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: sw.bb2: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: sw.bb3: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: sw.bb4: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: sw.default: -; CHECK-NEXT: br label [[RETURN]] +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[C:%.*]], 42 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_RANGEREDUCE]], 5 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [5 x i96], [5 x i96]* @switch.table.illegaltype, i32 0, i32 [[SWITCH_RANGEREDUCE]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i96, i96* [[SWITCH_GEP]] +; CHECK-NEXT: ret i96 [[SWITCH_LOAD]] ; CHECK: return: -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i96 [ 15, [[SW_DEFAULT]] ], [ 27, [[SW_BB4]] ], [ -1, [[SW_BB3]] ], [ 0, [[SW_BB2]] ], [ 123, [[SW_BB1]] ], [ 55, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i96 [[RETVAL_0]] +; CHECK-NEXT: ret i96 15 ; entry: switch i32 %c, label %sw.default [ @@ -1214,11 +1197,11 @@ define i8 @linearmap1(i32 %c) { ; CHECK-LABEL: @linearmap1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[C:%.*]], 10 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 4 -; CHECK-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[C:%.*]], 10 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_RANGEREDUCE]], 4 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] ; CHECK: switch.lookup: -; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[TMP0]] to i8 +; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[SWITCH_RANGEREDUCE]] to i8 ; CHECK-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul i8 [[SWITCH_IDX_CAST]], -5 ; CHECK-NEXT: [[SWITCH_OFFSET:%.*]] = add i8 [[SWITCH_IDX_MULT]], 18 ; CHECK-NEXT: ret i8 [[SWITCH_OFFSET]] @@ -1245,15 +1228,10 @@ define i32 @linearmap2(i8 %c) { ; CHECK-LABEL: @linearmap2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i8 [[C:%.*]], -13 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[TMP0]], 4 -; CHECK-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] -; CHECK: switch.lookup: -; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = zext i8 [[TMP0]] to i32 -; CHECK-NEXT: [[SWITCH_OFFSET:%.*]] = add i32 [[SWITCH_IDX_CAST]], 18 -; CHECK-NEXT: ret i32 [[SWITCH_OFFSET]] -; CHECK: return: -; CHECK-NEXT: ret i32 3 +; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i8 [[C:%.*]] to i9 +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* @switch.table.linearmap2, i32 0, i9 [[SWITCH_TABLEIDX_ZEXT]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]] +; CHECK-NEXT: ret i32 [[SWITCH_LOAD]] ; entry: switch i8 %c, label %sw.default [ @@ -1275,11 +1253,11 @@ define i8 @linearmap3(i32 %c) { ; CHECK-LABEL: @linearmap3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[C:%.*]], 10 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 4 -; CHECK-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[C:%.*]], 10 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_RANGEREDUCE]], 4 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] ; CHECK: switch.lookup: -; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[TMP0]] to i8 +; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[SWITCH_RANGEREDUCE]] to i8 ; CHECK-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul i8 [[SWITCH_IDX_CAST]], 100 ; CHECK-NEXT: ret i8 [[SWITCH_IDX_MULT]] ; CHECK: return: @@ -1305,11 +1283,11 @@ define i8 @linearmap4(i32 %c) { ; CHECK-LABEL: @linearmap4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[C:%.*]], -2 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 4 -; CHECK-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[C:%.*]], -2 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_RANGEREDUCE]], 4 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] ; CHECK: switch.lookup: -; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[TMP0]] to i8 +; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[SWITCH_RANGEREDUCE]] to i8 ; CHECK-NEXT: ret i8 [[SWITCH_IDX_CAST]] ; CHECK: return: ; CHECK-NEXT: ret i8 3 Index: test/Transforms/SimplifyCFG/rangereduce.ll =================================================================== --- test/Transforms/SimplifyCFG/rangereduce.ll +++ test/Transforms/SimplifyCFG/rangereduce.ll @@ -6,25 +6,18 @@ define i32 @test1(i32 %a) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], 97 -; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP1]], 30 -; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: switch i32 [[TMP4]], label [[DEF:%.*]] [ -; CHECK-NEXT: i32 0, label [[ONE:%.*]] -; CHECK-NEXT: i32 1, label [[TWO:%.*]] -; CHECK-NEXT: i32 2, label [[THREE:%.*]] -; CHECK-NEXT: i32 3, label [[THREE]] -; CHECK-NEXT: ] +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[A:%.*]], 97 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[SWITCH_RANGEREDUCE]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[SWITCH_RANGEREDUCE]], 30 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 4 +; CHECK-NEXT: br i1 [[TMP4]], label [[SWITCH_LOOKUP:%.*]], label [[DEF:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.test1, i32 0, i32 [[TMP3]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]] +; CHECK-NEXT: ret i32 [[SWITCH_LOAD]] ; CHECK: def: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ] -; CHECK-NEXT: ret i32 [[MERGE]] -; CHECK: one: -; CHECK-NEXT: br label [[DEF]] -; CHECK: two: -; CHECK-NEXT: br label [[DEF]] -; CHECK: three: -; CHECK-NEXT: br label [[DEF]] +; CHECK-NEXT: ret i32 8867 ; switch i32 %a, label %def [ i32 97, label %one @@ -119,22 +112,15 @@ ; Optimization shouldn't trigger; not an arithmetic progression define i32 @test4(i32 %a) { ; CHECK-LABEL: @test4( -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], 97 -; CHECK-NEXT: switch i32 [[TMP1]], label [[DEF:%.*]] [ -; CHECK-NEXT: i32 0, label [[ONE:%.*]] -; CHECK-NEXT: i32 5, label [[TWO:%.*]] -; CHECK-NEXT: i32 8, label [[THREE:%.*]] -; CHECK-NEXT: i32 12, label [[THREE]] -; CHECK-NEXT: ] +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[A:%.*]], 97 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SWITCH_RANGEREDUCE]], 13 +; CHECK-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[DEF:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [13 x i32], [13 x i32]* @switch.table.test4, i32 0, i32 [[SWITCH_RANGEREDUCE]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]] +; CHECK-NEXT: ret i32 [[SWITCH_LOAD]] ; CHECK: def: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ] -; CHECK-NEXT: ret i32 [[MERGE]] -; CHECK: one: -; CHECK-NEXT: br label [[DEF]] -; CHECK: two: -; CHECK-NEXT: br label [[DEF]] -; CHECK: three: -; CHECK-NEXT: br label [[DEF]] +; CHECK-NEXT: ret i32 8867 ; switch i32 %a, label %def [ i32 97, label %one @@ -157,22 +143,15 @@ ; Optimization shouldn't trigger; not a power of two define i32 @test5(i32 %a) { ; CHECK-LABEL: @test5( -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], 97 -; CHECK-NEXT: switch i32 [[TMP1]], label [[DEF:%.*]] [ -; CHECK-NEXT: i32 0, label [[ONE:%.*]] -; CHECK-NEXT: i32 5, label [[TWO:%.*]] -; CHECK-NEXT: i32 10, label [[THREE:%.*]] -; CHECK-NEXT: i32 15, label [[THREE]] -; CHECK-NEXT: ] +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[A:%.*]], 97 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SWITCH_RANGEREDUCE]], 16 +; CHECK-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[DEF:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* @switch.table.test5, i32 0, i32 [[SWITCH_RANGEREDUCE]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]] +; CHECK-NEXT: ret i32 [[SWITCH_LOAD]] ; CHECK: def: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ] -; CHECK-NEXT: ret i32 [[MERGE]] -; CHECK: one: -; CHECK-NEXT: br label [[DEF]] -; CHECK: two: -; CHECK-NEXT: br label [[DEF]] -; CHECK: three: -; CHECK-NEXT: br label [[DEF]] +; CHECK-NEXT: ret i32 8867 ; switch i32 %a, label %def [ i32 97, label %one @@ -194,25 +173,18 @@ define i32 @test6(i32 %a) optsize { ; CHECK-LABEL: @test6( -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], -109 -; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP1]], 30 -; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: switch i32 [[TMP4]], label [[DEF:%.*]] [ -; CHECK-NEXT: i32 3, label [[ONE:%.*]] -; CHECK-NEXT: i32 2, label [[TWO:%.*]] -; CHECK-NEXT: i32 1, label [[THREE:%.*]] -; CHECK-NEXT: i32 0, label [[THREE]] -; CHECK-NEXT: ] +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[A:%.*]], -109 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[SWITCH_RANGEREDUCE]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[SWITCH_RANGEREDUCE]], 30 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 4 +; CHECK-NEXT: br i1 [[TMP4]], label [[SWITCH_LOOKUP:%.*]], label [[DEF:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.test6, i32 0, i32 [[TMP3]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]] +; CHECK-NEXT: ret i32 [[SWITCH_LOAD]] ; CHECK: def: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ] -; CHECK-NEXT: ret i32 [[MERGE]] -; CHECK: one: -; CHECK-NEXT: br label [[DEF]] -; CHECK: two: -; CHECK-NEXT: br label [[DEF]] -; CHECK: three: -; CHECK-NEXT: br label [[DEF]] +; CHECK-NEXT: ret i32 8867 ; switch i32 %a, label %def [ i32 -97, label %one @@ -237,11 +209,11 @@ ; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[A:%.*]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[A]], 6 ; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = sub i8 [[TMP3]], 55 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 4 -; CHECK-NEXT: br i1 [[TMP5]], label [[SWITCH_LOOKUP:%.*]], label [[DEF:%.*]] +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i8 [[TMP3]], 55 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i8 [[SWITCH_RANGEREDUCE]], 4 +; CHECK-NEXT: br i1 [[TMP4]], label [[SWITCH_LOOKUP:%.*]], label [[DEF:%.*]] ; CHECK: switch.lookup: -; CHECK-NEXT: [[SWITCH_CAST:%.*]] = zext i8 [[TMP4]] to i32 +; CHECK-NEXT: [[SWITCH_CAST:%.*]] = zext i8 [[SWITCH_RANGEREDUCE]] to i32 ; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul i32 [[SWITCH_CAST]], 8 ; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i32 -943228976, [[SWITCH_SHIFTAMT]] ; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i32 [[SWITCH_DOWNSHIFT]] to i8 @@ -269,25 +241,18 @@ define i32 @test8(i32 %a) optsize { ; CHECK-LABEL: @test8( -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], 97 -; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP1]], 30 -; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: switch i32 [[TMP4]], label [[DEF:%.*]] [ -; CHECK-NEXT: i32 0, label [[ONE:%.*]] -; CHECK-NEXT: i32 1, label [[TWO:%.*]] -; CHECK-NEXT: i32 2, label [[THREE:%.*]] -; CHECK-NEXT: i32 4, label [[THREE]] -; CHECK-NEXT: ] +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[A:%.*]], 97 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[SWITCH_RANGEREDUCE]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[SWITCH_RANGEREDUCE]], 30 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 5 +; CHECK-NEXT: br i1 [[TMP4]], label [[SWITCH_LOOKUP:%.*]], label [[DEF:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @switch.table.test8, i32 0, i32 [[TMP3]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]] +; CHECK-NEXT: ret i32 [[SWITCH_LOAD]] ; CHECK: def: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ] -; CHECK-NEXT: ret i32 [[MERGE]] -; CHECK: one: -; CHECK-NEXT: br label [[DEF]] -; CHECK: two: -; CHECK-NEXT: br label [[DEF]] -; CHECK: three: -; CHECK-NEXT: br label [[DEF]] +; CHECK-NEXT: ret i32 8867 ; switch i32 %a, label %def [ i32 97, label %one @@ -312,21 +277,14 @@ ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[A:%.*]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[A]], 31 ; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]] -; CHECK-NEXT: switch i32 [[TMP3]], label [[DEF:%.*]] [ -; CHECK-NEXT: i32 9, label [[ONE:%.*]] -; CHECK-NEXT: i32 10, label [[TWO:%.*]] -; CHECK-NEXT: i32 3, label [[THREE:%.*]] -; CHECK-NEXT: i32 5, label [[THREE]] -; CHECK-NEXT: ] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 11 +; CHECK-NEXT: br i1 [[TMP4]], label [[SWITCH_LOOKUP:%.*]], label [[DEF:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* @switch.table.test9, i32 0, i32 [[TMP3]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]] +; CHECK-NEXT: ret i32 [[SWITCH_LOAD]] ; CHECK: def: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ] -; CHECK-NEXT: ret i32 [[MERGE]] -; CHECK: one: -; CHECK-NEXT: br label [[DEF]] -; CHECK: two: -; CHECK-NEXT: br label [[DEF]] -; CHECK: three: -; CHECK-NEXT: br label [[DEF]] +; CHECK-NEXT: ret i32 8867 ; switch i32 %a, label %def [ i32 18, label %one Index: test/Transforms/SimplifyCFG/switch-genfori8.ll =================================================================== --- test/Transforms/SimplifyCFG/switch-genfori8.ll +++ test/Transforms/SimplifyCFG/switch-genfori8.ll @@ -14,60 +14,11 @@ ; Function Attrs: norecurse nounwind readnone uwtable define dso_local zeroext i8 @char_to_digit(i8 zeroext) local_unnamed_addr #0 { ; CHECK-LABEL: @char_to_digit( -; CHECK-NEXT: [[TMP2:%.*]] = sub i8 [[TMP0:%.*]], 48 -; CHECK-NEXT: switch i8 [[TMP2]], label [[TMP18:%.*]] [ -; CHECK-NEXT: i8 0, label [[TMP19:%.*]] -; CHECK-NEXT: i8 1, label [[TMP3:%.*]] -; CHECK-NEXT: i8 2, label [[TMP4:%.*]] -; CHECK-NEXT: i8 3, label [[TMP5:%.*]] -; CHECK-NEXT: i8 4, label [[TMP6:%.*]] -; CHECK-NEXT: i8 5, label [[TMP7:%.*]] -; CHECK-NEXT: i8 6, label [[TMP8:%.*]] -; CHECK-NEXT: i8 7, label [[TMP9:%.*]] -; CHECK-NEXT: i8 8, label [[TMP10:%.*]] -; CHECK-NEXT: i8 9, label [[TMP11:%.*]] -; CHECK-NEXT: i8 49, label [[TMP12:%.*]] -; CHECK-NEXT: i8 50, label [[TMP13:%.*]] -; CHECK-NEXT: i8 51, label [[TMP14:%.*]] -; CHECK-NEXT: i8 52, label [[TMP15:%.*]] -; CHECK-NEXT: i8 53, label [[TMP16:%.*]] -; CHECK-NEXT: i8 54, label [[TMP17:%.*]] -; CHECK-NEXT: ] -; CHECK: 3: -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 4: -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 5: -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 6: -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 7: -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 8: -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 9: -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 10: -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 11: -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 12: -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 13: -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 14: -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 15: -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 16: -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 17: -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 18: -; CHECK-NEXT: br label [[TMP19]] -; CHECK: 19: -; CHECK-NEXT: [[TMP20:%.*]] = phi i8 [ -1, [[TMP18]] ], [ 15, [[TMP17]] ], [ 14, [[TMP16]] ], [ 13, [[TMP15]] ], [ 12, [[TMP14]] ], [ 11, [[TMP13]] ], [ 10, [[TMP12]] ], [ 9, [[TMP11]] ], [ 8, [[TMP10]] ], [ 7, [[TMP9]] ], [ 6, [[TMP8]] ], [ 5, [[TMP7]] ], [ 4, [[TMP6]] ], [ 3, [[TMP5]] ], [ 2, [[TMP4]] ], [ 1, [[TMP3]] ], [ 0, [[TMP1:%.*]] ] -; CHECK-NEXT: ret i8 [[TMP20]] +; CHECK-NEXT: switch.lookup: +; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i8 [[TMP0:%.*]] to i9 +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [256 x i8], [256 x i8]* @switch.table.char_to_digit, i32 0, i9 [[SWITCH_TABLEIDX_ZEXT]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i8, i8* [[SWITCH_GEP]] +; CHECK-NEXT: ret i8 [[SWITCH_LOAD]] ; switch i8 %0, label %17 [ i8 48, label %18