Index: include/llvm/Target/TargetOptions.h =================================================================== --- include/llvm/Target/TargetOptions.h +++ include/llvm/Target/TargetOptions.h @@ -118,13 +118,14 @@ NoTrapAfterNoreturn(false), EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false), EmitStackSizeSection(false), EnableMachineOutliner(false), - SupportsDefaultOutlining(false), EmitAddrsig(false) {} + SupportsDefaultOutlining(false), EmitAddrsig(false), + EmitSwitchBitmap(false) {} /// PrintMachineCode - This flag is enabled when the -print-machineinstrs /// option is specified on the command line, and should enable debugging /// output from the code generator. unsigned PrintMachineCode : 1; - + /// DisableFramePointerElim - This returns true if frame pointer elimination /// optimization should be disabled for the given machine function. bool DisableFramePointerElim(const MachineFunction &MF) const; @@ -252,6 +253,9 @@ /// Emit address-significance table. unsigned EmitAddrsig : 1; + // Try to emit a Bitmap instead of a Switch Table. + unsigned EmitSwitchBitmap : 1; + /// FloatABIType - This setting is set by -float-abi=xxx option is specfied /// on the command line. This setting may either be Default, Soft, or Hard. /// Default selects the target's default behavior. Soft selects the ABI for Index: include/llvm/Transforms/IPO/PassManagerBuilder.h =================================================================== --- include/llvm/Transforms/IPO/PassManagerBuilder.h +++ include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -166,6 +166,9 @@ /// Path of the sample Profile data file. std::string PGOSampleUse; + /// Don't convert a Switch Table into a Bitmap + bool NoSwitchBitmap; + private: /// ExtensionList - This is list of all of the extensions that are registered. std::vector> Extensions; Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -250,9 +250,10 @@ // CFGSimplification - Merge basic blocks, eliminate unreachable blocks, // simplify terminator instructions, convert switches to lookup tables, etc. // -FunctionPass *createCFGSimplificationPass( - unsigned Threshold = 1, bool ForwardSwitchCond = false, - bool ConvertSwitch = false, bool KeepLoops = true, bool SinkCommon = false, +FunctionPass *createCFGSimplificationPass(unsigned Threshold = 1, + bool ForwardSwitchCond = false, bool ConvertSwitch = false, + bool KeepLoops = true, bool SinkCommon = false, + bool noSwitchBitmap = false, std::function Ftor = nullptr); //===----------------------------------------------------------------------===// Index: include/llvm/Transforms/Utils/Local.h =================================================================== --- include/llvm/Transforms/Utils/Local.h +++ include/llvm/Transforms/Utils/Local.h @@ -67,6 +67,7 @@ bool ConvertSwitchToLookupTable; bool NeedCanonicalLoop; bool SinkCommonInsts; + bool NoSwitchBitmap; AssumptionCache *AC; SimplifyCFGOptions(unsigned BonusThreshold = 1, Index: lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetMachine.cpp +++ lib/Target/AArch64/AArch64TargetMachine.cpp @@ -405,7 +405,8 @@ // determine whether it succeeded. We can exploit existing control-flow in // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) - addPass(createCFGSimplificationPass(1, true, true, false, true)); + addPass(createCFGSimplificationPass(1, true, true, false, true, + TM->Options.EmitSwitchBitmap)); // Run LoopDataPrefetch // Index: lib/Target/ARM/ARMTargetMachine.cpp =================================================================== --- lib/Target/ARM/ARMTargetMachine.cpp +++ lib/Target/ARM/ARMTargetMachine.cpp @@ -385,7 +385,7 @@ // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) addPass(createCFGSimplificationPass( - 1, false, false, true, true, [this](const Function &F) { + 1, false, false, true, true, false, [this](const Function &F) { const auto &ST = this->TM->getSubtarget(F); return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); })); Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -681,7 +681,8 @@ // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. - MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + MPM.add(createCFGSimplificationPass(1, true, true, false, true, + NoSwitchBitmap)); if (RunSLPAfterLoopVectorization && SLPVectorize) { MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. Index: lib/Transforms/Scalar/SimplifyCFGPass.cpp =================================================================== --- lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -64,7 +64,6 @@ "sink-common-insts", cl::Hidden, cl::init(false), cl::desc("Sink common instructions (default = false)")); - STATISTIC(NumSimpl, "Number of blocks simplified"); /// If we have more than one empty (other than phi node) return blocks, @@ -234,6 +233,7 @@ CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false, bool ConvertSwitch = false, bool KeepLoops = true, bool SinkCommon = false, + bool noSwitchBitmap = false, std::function Ftor = nullptr) : FunctionPass(ID), PredicateFtor(std::move(Ftor)) { @@ -258,6 +258,8 @@ Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences() ? UserSinkCommonInsts : SinkCommon; + + Options.NoSwitchBitmap = noSwitchBitmap; } bool runOnFunction(Function &F) override { @@ -288,8 +290,9 @@ FunctionPass * llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond, bool ConvertSwitch, bool KeepLoops, - bool SinkCommon, - std::function Ftor) { + bool SinkCommon, bool noSwitchBitmap, + std::function Ftor) { return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch, - KeepLoops, SinkCommon, std::move(Ftor)); + KeepLoops, SinkCommon, noSwitchBitmap, + std::move(Ftor)); } Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -4875,7 +4875,8 @@ SwitchLookupTable( Module &M, uint64_t TableSize, ConstantInt *Offset, const SmallVectorImpl> &Values, - Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName); + Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName, + SimplifyCFGOptions &Options); /// Build instructions with Builder to retrieve the value at /// the position given by Index in the lookup table. @@ -4929,13 +4930,14 @@ SwitchLookupTable::SwitchLookupTable( Module &M, uint64_t TableSize, ConstantInt *Offset, const SmallVectorImpl> &Values, - Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) { + Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName, + SimplifyCFGOptions &Options) { assert(Values.size() && "Can't build lookup table without values!"); assert(TableSize >= Values.size() && "Can't fit values in table!"); - + // If all values in the table are equal, this is that value. SingleValue = Values.begin()->second; - + Type *ValueType = Values.begin()->second->getType(); // Build up the table contents. @@ -5010,25 +5012,28 @@ } } - // If the type is integer and the table fits in a register, build a bitmap. - if (WouldFitInRegister(DL, TableSize, ValueType)) { - IntegerType *IT = cast(ValueType); - APInt TableInt(TableSize * IT->getBitWidth(), 0); - for (uint64_t I = TableSize; I > 0; --I) { - TableInt <<= IT->getBitWidth(); - // Insert values into the bitmap. Undef values are set to zero. - if (!isa(TableContents[I - 1])) { - ConstantInt *Val = cast(TableContents[I - 1]); - TableInt |= Val->getValue().zext(TableInt.getBitWidth()); + // if fno-switch-bitmap flag is on, will skip building of a bitmap in place + // of a switch table. + if (!Options.NoSwitchBitmap) { + // If the type is integer and the table fits in a register, build a bitmap. + if (WouldFitInRegister(DL, TableSize, ValueType)) { + IntegerType *IT = cast(ValueType); + APInt TableInt(TableSize * IT->getBitWidth(), 0); + for (uint64_t I = TableSize; I > 0; --I) { + TableInt <<= IT->getBitWidth(); + // Insert values into the bitmap. Undef values are set to zero. + if (!isa(TableContents[I - 1])) { + ConstantInt *Val = cast(TableContents[I - 1]); + TableInt |= Val->getValue().zext(TableInt.getBitWidth()); + } } + BitMap = ConstantInt::get(M.getContext(), TableInt); + BitMapElementTy = IT; + Kind = BitMapKind; + ++NumBitMaps; + return; } - BitMap = ConstantInt::get(M.getContext(), TableInt); - BitMapElementTy = IT; - Kind = BitMapKind; - ++NumBitMaps; - return; } - // Store the table in an array. ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize); Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); @@ -5244,7 +5249,8 @@ /// lookup tables. static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, - const TargetTransformInfo &TTI) { + const TargetTransformInfo &TTI, + SimplifyCFGOptions Options) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); Function *Fn = SI->getParent()->getParent(); @@ -5437,7 +5443,7 @@ Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; StringRef FuncName = Fn->getName(); SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL, - FuncName); + FuncName, Options); Value *Result = Table.BuildLookup(TableIndex, Builder); @@ -5628,7 +5634,7 @@ // CVP. Therefore, only apply this transformation during late stages of the // optimisation pipeline. if (Options.ConvertSwitchToLookupTable && - SwitchToLookupTable(SI, Builder, DL, TTI)) + SwitchToLookupTable(SI, Builder, DL, TTI, Options)) return requestResimplify(); if (ReduceSwitchRange(SI, Builder, DL, TTI)) Index: test/Transforms/Util/no_switch_bitmap.ll =================================================================== --- /dev/null +++ test/Transforms/Util/no_switch_bitmap.ll @@ -0,0 +1,38 @@ +;RUN: llc %s -o - -verify-machineinstrs | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-arm-none-eabi" + +@switch.table.test = private unnamed_addr constant [3 x i16] [i16 500, i16 200, i16 17000], align 4 + +; Function Attrs: minsize norecurse nounwind optsize readnone +;CHECK-LABEL: @test +;CHECK: cmp +;CHECK: b +;CHECK: adrp +;CHECK: add +;CHECK: ldrh +;CHECK: ret +;CHECK: mov +;CHECK: ret +define dso_local i16 @test(i32) local_unnamed_addr #0 { + %2 = icmp ult i32 %0, 3 + br i1 %2, label %3, label %7 + +;