Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -5489,6 +5489,33 @@ return true; } +/// A cheap transform: investigate using ctlz as a key to the switch +/// FIXME: this transform would benifit from proper range analysis in SwitchToLookupTable +static bool ReduceSwitchRangeWithCtlz(SwitchInst &SI, + SmallVectorImpl &Values) { + // There is a big gotcha regarding ctlz/cttz: when given 0, they + // return the bit width. If not handled this can result in the transform + // being applied multiple times. We avoid this with a range check here. + unsigned BitWidth = SI.getCondition()->getType()->getIntegerBitWidth(); + if (Values.back() <= BitWidth) + return false; + + unsigned MaxPopCount = 0; + for (auto &V : Values) { + MaxPopCount = countPopulation(V); + if (MaxPopCount > 1) + break; + } + if (MaxPopCount != 1) + return false; + for (auto &V : Values) + V = APInt(BitWidth, V).countLeadingZeros(); + // 0 will suddenly become the largest (BitWidth), so we need to sort again. + // Ctlz of popcount <= 1 will always reverse the order. + llvm::reverse(Values); + return true; +} + /// Try to transform a switch that has "holes" in it to a contiguous sequence /// of cases. /// @@ -5516,7 +5543,7 @@ return false; // We organize the range to start from 0, if it is not already close. - SmallVector Values; + SmallVector Values; for (auto &C : SI->cases()) Values.push_back(C.getCaseValue()->getValue().getLimitedValue()); llvm::sort(Values); @@ -5524,6 +5551,11 @@ bool MadeChanges = false; // We must first look find the best start point, for example if we have a series // that crosses zero: -2, -1, 0, 1, 2. + + bool UseCtlz = ReduceSwitchRangeWithCtlz(*SI, Values); + if (UseCtlz) + MadeChanges = true; + uint64_t BestDistance = APInt::getMaxValue(CondTy->getIntegerBitWidth()).getLimitedValue() - Values.back() + Values.front() + 1; unsigned BestIndex = 0; @@ -5548,9 +5580,6 @@ // This transform can be done speculatively because it is so cheap - it results // in a single rotate operation being inserted. - // FIXME: It's possible that optimizing a switch on powers of two might also - // be beneficial - flag values are often powers of two and we could use a CLZ - // as the key function. // Cttz often has an edge condition on 0 which means that the bit-width // is important, however here there is no such edge condition because if @@ -5583,10 +5612,14 @@ auto *Ty = cast(SI->getCondition()->getType()); Builder.SetInsertPoint(SI); Value *Key = SI->getCondition(); - if (Base > 0) { - Key = Builder.CreateSub(Key, ConstantInt::get(Ty, Base), "switch.rangereduce"); + if (UseCtlz) { + auto *ZeroNotUndef = Builder.getInt1(false); + Function *Ctlz = Intrinsic::getDeclaration(SI->getModule(), Intrinsic::ctlz, Ty); + Key = Builder.CreateCall(Ctlz, {Key, ZeroNotUndef}, "switch.rangereduce"); } - if (Shift > 0) { + if (Base > 0) + Key = Builder.CreateSub(Key, ConstantInt::get(Ty, Base), "switch.rangereduce"); + if (Shift) { auto *ShiftC = ConstantInt::get(Ty, Shift); Function *Fshr = Intrinsic::getDeclaration(SI->getModule(), Intrinsic::fshr, Ty); Key = Builder.CreateCall(Fshr, {Key, Key, ShiftC}, "switch.rangereduce"); @@ -5595,7 +5628,12 @@ for (auto Case : SI->cases()) { auto *Orig = Case.getCaseValue(); - auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base); + uint64_t Zeros; + if (UseCtlz) + Zeros = Orig->getValue().countLeadingZeros(); + else + Zeros = Orig->getValue().getLimitedValue(); + auto Sub = (APInt(BitWidth, Zeros) - Base); Case.setValue( cast(ConstantInt::get(Ty, Sub.lshr(Shift)))); } Index: test/Transforms/SimplifyCFG/rangereduce.ll =================================================================== --- test/Transforms/SimplifyCFG/rangereduce.ll +++ test/Transforms/SimplifyCFG/rangereduce.ll @@ -79,7 +79,7 @@ ret i128 99783 } -; Optimization shouldn't trigger; too few cases +; Optimization shouldn't trigger; no holes present define i32 @test3(i32 %a) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: switch i32 [[A:%.*]], label [[DEF:%.*]] [ @@ -114,6 +114,7 @@ ret i32 99783 } +; Optimization shouldn't trigger; not an arithmetic progression define i32 @test4(i32 %a) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[A:%.*]], 97 @@ -151,6 +152,7 @@ ret i32 99783 } +; Optimization shouldn't trigger; not a power of two define i32 @test5(i32 %a) { ; CHECK-LABEL: @test5( ; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[A:%.*]], 97 Index: test/Transforms/SimplifyCFG/switch-simplify-range.ll =================================================================== --- test/Transforms/SimplifyCFG/switch-simplify-range.ll +++ test/Transforms/SimplifyCFG/switch-simplify-range.ll @@ -44,35 +44,16 @@ define i64 @switch_clz1(i16 %a) optsize #0 { ; CHECK-LABEL: @switch_clz1( ; CHECK-NEXT: Entry: -; CHECK-NEXT: switch i16 [[A:%.*]], label [[SWITCHELSE:%.*]] [ -; CHECK-NEXT: i16 2, label [[SWITCHPRONG:%.*]] -; CHECK-NEXT: i16 4, label [[SWITCHPRONG1:%.*]] -; CHECK-NEXT: i16 8, label [[SWITCHPRONG2:%.*]] -; CHECK-NEXT: i16 1, label [[SWITCHPRONG3:%.*]] -; CHECK-NEXT: i16 64, label [[SWITCHPRONG6:%.*]] -; CHECK-NEXT: i16 128, label [[SWITCHPRONG7:%.*]] -; CHECK-NEXT: i16 16, label [[SWITCHPRONG5:%.*]] -; CHECK-NEXT: i16 32, label [[SWITCHPRONG4:%.*]] -; CHECK-NEXT: ] +; CHECK-NEXT: [[TMP0:%.*]] = call i16 @llvm.ctlz.i16(i16 [[A:%.*]], i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = sub i16 [[TMP0]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i16 [[TMP1]], 8 +; CHECK-NEXT: br i1 [[TMP2]], label [[SWITCH_LOOKUP:%.*]], label [[SWITCHELSE:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* @switch.table.switch_clz1, i32 0, i16 [[TMP1]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i64, i64* [[SWITCH_GEP]] +; CHECK-NEXT: ret i64 [[SWITCH_LOAD]] ; CHECK: SwitchElse: -; CHECK-NEXT: [[MERGE:%.*]] = phi i64 [ 10, [[ENTRY:%.*]] ], [ 6, [[SWITCHPRONG]] ], [ 3, [[SWITCHPRONG1]] ], [ 35, [[SWITCHPRONG2]] ], [ 31, [[SWITCHPRONG3]] ], [ 53, [[SWITCHPRONG4]] ], [ 51, [[SWITCHPRONG5]] ], [ 41, [[SWITCHPRONG6]] ], [ 34, [[SWITCHPRONG7]] ] -; CHECK-NEXT: ret i64 [[MERGE]] -; CHECK: SwitchProng: -; CHECK-NEXT: br label [[SWITCHELSE]] -; CHECK: SwitchProng1: -; CHECK-NEXT: br label [[SWITCHELSE]] -; CHECK: SwitchProng2: -; CHECK-NEXT: br label [[SWITCHELSE]] -; CHECK: SwitchProng3: -; CHECK-NEXT: br label [[SWITCHELSE]] -; CHECK: SwitchProng4: -; CHECK-NEXT: br label [[SWITCHELSE]] -; CHECK: SwitchProng5: -; CHECK-NEXT: br label [[SWITCHELSE]] -; CHECK: SwitchProng6: -; CHECK-NEXT: br label [[SWITCHELSE]] -; CHECK: SwitchProng7: -; CHECK-NEXT: br label [[SWITCHELSE]] +; CHECK-NEXT: ret i64 10 ; Entry: switch i16 %a, label %SwitchElse [ @@ -108,26 +89,15 @@ define i64 @switch_clz2(i8 %a) optsize #0 { ; CHECK-LABEL: @switch_clz2( ; CHECK-NEXT: Entry: -; CHECK-NEXT: switch i8 [[A:%.*]], label [[SWITCHELSE:%.*]] [ -; CHECK-NEXT: i8 -128, label [[SWITCHPRONG2:%.*]] -; CHECK-NEXT: i8 64, label [[SWITCHPRONG3:%.*]] -; CHECK-NEXT: i8 32, label [[SWITCHPRONG6:%.*]] -; CHECK-NEXT: i8 1, label [[SWITCHPRONG7:%.*]] -; CHECK-NEXT: i8 0, label [[SWITCHPRONG5:%.*]] -; CHECK-NEXT: ] +; CHECK-NEXT: [[TMP0:%.*]] = call i8 @llvm.ctlz.i8(i8 [[A:%.*]], i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[TMP0]], 9 +; CHECK-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[SWITCHPRONG2:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* @switch.table.switch_clz2, i32 0, i8 [[TMP0]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i64, i64* [[SWITCH_GEP]] +; CHECK-NEXT: ret i64 [[SWITCH_LOAD]] ; CHECK: SwitchProng2: -; CHECK-NEXT: [[MERGE:%.*]] = phi i64 [ 35, [[ENTRY:%.*]] ], [ 31, [[SWITCHPRONG3]] ], [ 41, [[SWITCHPRONG6]] ], [ 40, [[SWITCHPRONG5]] ], [ 43, [[SWITCHPRONG7]] ], [ 12, [[SWITCHELSE]] ] -; CHECK-NEXT: ret i64 [[MERGE]] -; CHECK: SwitchProng3: -; CHECK-NEXT: br label [[SWITCHPRONG2]] -; CHECK: SwitchProng6: -; CHECK-NEXT: br label [[SWITCHPRONG2]] -; CHECK: SwitchProng5: -; CHECK-NEXT: br label [[SWITCHPRONG2]] -; CHECK: SwitchProng7: -; CHECK-NEXT: br label [[SWITCHPRONG2]] -; CHECK: SwitchElse: -; CHECK-NEXT: br label [[SWITCHPRONG2]] +; CHECK-NEXT: ret i64 12 ; Entry: switch i8 %a, label %SwitchElse [