diff --git a/llvm/docs/Passes.rst b/llvm/docs/Passes.rst --- a/llvm/docs/Passes.rst +++ b/llvm/docs/Passes.rst @@ -973,6 +973,11 @@ at 2), which effectively gives values in deep loops higher rank than values not in loops. +``-rel-lookup-table-converter``: Relative lookup table converter +----------------------------------------- + +This pass converts lookup tables to PIC-friendly relative lookup tables. + ``-reg2mem``: Demote all values to stack slots ---------------------------------------------- diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -720,6 +720,9 @@ /// containing this constant value for the target. bool shouldBuildLookupTablesForConstant(Constant *C) const; + /// Return true if lookup tables should be turned into relative lookup tables. + bool shouldBuildRelLookupTables() const; + /// Return true if the input function which is cold at all call sites, /// should use coldcc calling convention. bool useColdCCForColdCall(Function &F) const; @@ -1481,6 +1484,7 @@ virtual unsigned getRegUsageForType(Type *Ty) = 0; virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; + virtual bool shouldBuildRelLookupTables() = 0; virtual bool useColdCCForColdCall(Function &F) = 0; virtual unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, @@ -1867,6 +1871,9 @@ bool shouldBuildLookupTablesForConstant(Constant *C) override { return Impl.shouldBuildLookupTablesForConstant(C); } + bool shouldBuildRelLookupTables() override { + return Impl.shouldBuildRelLookupTables(); + } bool useColdCCForColdCall(Function &F) override { return Impl.useColdCCForColdCall(F); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -292,8 +292,11 @@ unsigned getRegUsageForType(Type *Ty) const { return 1; } bool shouldBuildLookupTables() const { return true; } + bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; } + bool shouldBuildRelLookupTables() const { return true; } + bool useColdCCForColdCall(Function &F) const { return false; } unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -45,6 +45,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" #include #include #include @@ -379,6 +380,25 @@ TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); } + bool shouldBuildRelLookupTables() { + const TargetMachine &TM = getTLI()->getTargetMachine(); + // If non-PIC mode, do not generate a relative lookup table. + if (!TM.isPositionIndependent()) + return false; + + if (!TM.getTargetTriple().isArch64Bit()) + return false; + + /// Relative lookup table entries consist of 32-bit offsets. + /// Do not generate relative lookup tables for large code models + /// in 64-bit achitectures where 32-bit offsets might not be enough. + if (TM.getCodeModel() == CodeModel::Medium || + TM.getCodeModel() == CodeModel::Large) + return false; + + return true; + } + bool haveFastSqrt(Type *Ty) { const TargetLoweringBase *TLI = getTLI(); EVT VT = TLI->getValueType(DL, Ty); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -318,6 +318,7 @@ void initializeMustExecutePrinterPass(PassRegistry&); void initializeMustBeExecutedContextPrinterPass(PassRegistry&); void initializeNameAnonGlobalLegacyPassPass(PassRegistry&); +void initializeRelLookupTableConverterLegacyPassPass(PassRegistry &); void initializeNaryReassociateLegacyPassPass(PassRegistry&); void initializeNewGVNLegacyPassPass(PassRegistry&); void initializeObjCARCAAWrapperPassPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -517,6 +517,7 @@ ///===---------------------------------------------------------------------===// ModulePass *createNameAnonGlobalPass(); +ModulePass *createRelLookupTableConverterPass(); ModulePass *createCanonicalizeAliasesPass(); //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Transforms/Utils/RelLookupTableConverter.h b/llvm/include/llvm/Transforms/Utils/RelLookupTableConverter.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/RelLookupTableConverter.h @@ -0,0 +1,70 @@ +//===-- RelLookupTableConverterPass.h - Rel Table Conv ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file implements relative lookup table converter that converts +/// lookup tables to relative lookup tables to make them PIC-friendly. +/// +/// Switch lookup table example: +/// @switch.table.foo = private unnamed_addr constant [3 x i8*] +/// [ +/// i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), +/// i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), +/// i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0) +/// ], align 8 +/// +/// switch.lookup: +/// %1 = sext i32 %cond to i64 +/// %switch.gep = getelementptr inbounds [3 x i8*], +/// [3 x i8*]* @switch.table.foo, i64 0, i64 %1 +/// %switch.load = load i8*, i8** %switch.gep, align 8 +/// ret i8* %switch.load +/// +/// Switch lookup table will become a relative lookup table that +/// consists of relative offsets. +/// +/// @reltable.foo = private unnamed_addr constant [3 x i32] +/// [ +/// i32 trunc (i64 sub (i64 ptrtoint ([5 x i8]* @.str to i64), +/// i64 ptrtoint ([3 x i32]* @reltable.foo to i64)) to i32), +/// i32 trunc (i64 sub (i64 ptrtoint ([4 x i8]* @.str.1 to i64), +/// i64 ptrtoint ([3 x i32]* @reltable.foo to i64)) to i32), +/// i32 trunc (i64 sub (i64 ptrtoint ([4 x i8]* @.str.2 to i64), +/// i64 ptrtoint ([3 x i32]* @reltable.foo to i64)) to i32) +/// ], align 4 +/// +/// IR after converting to a relative lookup table: +/// switch.lookup: +/// %1 = sext i32 %cond to i64 +/// %reltable.shift = shl i64 %1, 2 +/// %reltable.intrinsic = call i8* @llvm.load.relative.i64( +/// i8* bitcast ([3 x i32]* @reltable.foo to i8*), +/// i64 %reltable.shift) +/// ret i8* %reltable.intrinsic +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_RELLOOKUPTABLECONVERTER_H +#define LLVM_TRANSFORMS_UTILS_RELLOOKUPTABLECONVERTER_H + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +// Pass that converts lookup tables to relative lookup tables. +class RelLookupTableConverterPass + : public PassInfoMixin { +public: + RelLookupTableConverterPass() = default; + + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_RELLOOKUPTABLECONVERTER_H diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -456,11 +456,16 @@ bool TargetTransformInfo::shouldBuildLookupTables() const { return TTIImpl->shouldBuildLookupTables(); } + bool TargetTransformInfo::shouldBuildLookupTablesForConstant( Constant *C) const { return TTIImpl->shouldBuildLookupTablesForConstant(C); } +bool TargetTransformInfo::shouldBuildRelLookupTables() const { + return TTIImpl->shouldBuildRelLookupTables(); +} + bool TargetTransformInfo::useColdCCForColdCall(Function &F) const { return TTIImpl->useColdCCForColdCall(F); } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -227,6 +227,7 @@ #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/MetaRenamer.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" +#include "llvm/Transforms/Utils/RelLookupTableConverter.h" #include "llvm/Transforms/Utils/StripGCRelocates.h" #include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" @@ -1408,6 +1409,8 @@ MPM.addPass(GlobalDCEPass()); MPM.addPass(ConstantMergePass()); + MPM.addPass(RelLookupTableConverterPass()); + return MPM; } diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -63,8 +63,8 @@ MODULE_PASS("inferattrs", InferFunctionAttrsPass()) MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass()) MODULE_PASS("inliner-wrapper-no-mandatory-first", ModuleInlinerWrapperPass( - getInlineParams(), - DebugLogging, + getInlineParams(), + DebugLogging, false)) MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass()) MODULE_PASS("instrorderfile", InstrOrderFilePass()) @@ -93,6 +93,7 @@ MODULE_PASS("print-must-be-executed-contexts", MustBeExecutedContextPrinterPass(dbgs())) MODULE_PASS("print-stack-safety", StackSafetyGlobalPrinterPass(dbgs())) MODULE_PASS("print", ModuleDebugInfoPrinterPass(dbgs())) +MODULE_PASS("rel-lookup-table-converter", RelLookupTableConverterPass()) MODULE_PASS("rewrite-statepoints-for-gc", RewriteStatepointsForGC()) MODULE_PASS("rewrite-symbols", RewriteSymbolPass()) MODULE_PASS("rpo-function-attrs", ReversePostOrderFunctionAttrsPass()) @@ -281,7 +282,7 @@ FUNCTION_PASS("print", DominanceFrontierPrinterPass(dbgs())) FUNCTION_PASS("print", FunctionPropertiesPrinterPass(dbgs())) FUNCTION_PASS("print", InlineCostAnnotationPrinterPass(dbgs())) -FUNCTION_PASS("print", +FUNCTION_PASS("print", InlineSizeEstimatorAnalysisPrinterPass(dbgs())) FUNCTION_PASS("print", LoopPrinterPass(dbgs())) FUNCTION_PASS("print", MemorySSAPrinterPass(dbgs())) diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -909,6 +909,8 @@ // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. MPM.add(createCFGSimplificationPass()); + MPM.add(createRelLookupTableConverterPass()); + addExtensionsToPM(EP_OptimizerLast, MPM); if (PrepareForLTO) { diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt --- a/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -54,6 +54,7 @@ NameAnonGlobals.cpp PredicateInfo.cpp PromoteMemoryToRegister.cpp + RelLookupTableConverter.cpp ScalarEvolutionExpander.cpp StripGCRelocates.cpp SSAUpdater.cpp diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp @@ -0,0 +1,252 @@ +//===- RelLookupTableConverterPass - Rel Table Conv -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements relative lookup table converter that converts +// lookup tables to relative lookup tables to make them PIC-friendly. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/RelLookupTableConverter.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) { + if (!GV.hasInitializer()) + return false; + + // If lookup table has more than one user, + // do not generate a relative lookup table. + // This is to simplify the analysis that needs to be done for this pass. + // TODO: Add support for lookup tables with multiple uses. + // For ex, this can happen when a function that uses a lookup table gets + // inlined into multiple call sites. + if (!GV.hasOneUse()) + return false; + + GetElementPtrInst *GEP = + dyn_cast(GV.use_begin()->getUser()); + if (!GEP || !GEP->hasOneUse()) + return false; + + if (!isa(GEP->use_begin()->getUser())) + return false; + + // If the original lookup table is not dso_local, + // do not generate a relative lookup table. + // This optimization creates a relative lookup table that consists of + // offsets between the start of the lookup table and its elements. + // To be able to generate these offsets, relative lookup table + // and its elements should be dso_local, which means that they should + // resolve to symbols within the same linkage unit. + if (!(GV.isDSOLocal() || GV.isImplicitDSOLocal())) + return false; + + ConstantArray *Array = dyn_cast(GV.getInitializer()); + // If values are not pointers, do not generate a relative lookup table. + if (!Array || !Array->getType()->getElementType()->isPointerTy()) + return false; + + const DataLayout &DL = M.getDataLayout(); + for (const Use &Op : Array->operands()) { + Constant *ConstOp = cast(&Op); + GlobalValue *GVOp; + APInt Offset; + + // If an operand is not a constant offset from a lookup table, + // do not generate a relative lookup table. + if (!IsConstantOffsetFromGlobal(ConstOp, GVOp, Offset, DL)) + return false; + + // If an operand in the lookup table is not dso_local, + // do not generate a relative lookup table. + if (!(GVOp->isDSOLocal() || GVOp->isImplicitDSOLocal())) + return false; + } + + return true; +} + +static GlobalVariable *createRelLookupTable(Function &Func, + GlobalVariable &LookupTable) { + Module &M = *Func.getParent(); + ConstantArray *LookupTableArr = + cast(LookupTable.getInitializer()); + unsigned NumElts = LookupTableArr->getType()->getNumElements(); + ArrayType *IntArrayTy = + ArrayType::get(Type::getInt32Ty(M.getContext()), NumElts); + GlobalVariable *RelLookupTable = new GlobalVariable( + M, IntArrayTy, LookupTable.isConstant(), LookupTable.getLinkage(), + nullptr, "reltable." + Func.getName()); + RelLookupTable->copyAttributesFrom(&LookupTable); + + uint64_t Idx = 0; + SmallVector RelLookupTableContents(NumElts); + + for (Use &Operand : LookupTableArr->operands()) { + Constant *Element = cast(Operand); + Type *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext()); + Constant *Base = llvm::ConstantExpr::getPtrToInt(RelLookupTable, IntPtrTy); + Constant *Target = llvm::ConstantExpr::getPtrToInt(Element, IntPtrTy); + Constant *Sub = llvm::ConstantExpr::getSub(Target, Base); + Constant *RelOffset = + llvm::ConstantExpr::getTrunc(Sub, Type::getInt32Ty(M.getContext())); + RelLookupTableContents[Idx++] = RelOffset; + } + + Constant *Initializer = + ConstantArray::get(IntArrayTy, RelLookupTableContents); + RelLookupTable->setInitializer(Initializer); + RelLookupTable->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + RelLookupTable->setAlignment(llvm::Align(4)); + return RelLookupTable; +} + +static void convertToRelLookupTable(GlobalVariable &LookupTable) { + GetElementPtrInst *GEP = + cast(LookupTable.use_begin()->getUser()); + LoadInst *Load = cast(GEP->use_begin()->getUser()); + + Module &M = *LookupTable.getParent(); + BasicBlock *BB = GEP->getParent(); + IRBuilder<> Builder(BB); + Function &Func = *BB->getParent(); + + // Generate an array that consists of relative offsets. + GlobalVariable *RelLookupTable = createRelLookupTable(Func, LookupTable); + + // Place new instruction sequence after GEP. + Builder.SetInsertPoint(GEP); + Value *Index = GEP->getOperand(2); + IntegerType *IntTy = cast(Index->getType()); + Value *Offset = + Builder.CreateShl(Index, ConstantInt::get(IntTy, 2), "reltable.shift"); + + Function *LoadRelIntrinsic = llvm::Intrinsic::getDeclaration( + &M, Intrinsic::load_relative, {Index->getType()}); + Value *Base = Builder.CreateBitCast(RelLookupTable, Builder.getInt8PtrTy()); + + // Create a call to load.relative intrinsic that computes the target address + // by adding base address (lookup table address) and relative offset. + Value *Result = Builder.CreateCall(LoadRelIntrinsic, {Base, Offset}, + "reltable.intrinsic"); + + // Create a bitcast instruction if necessary. + if (Load->getType() != Builder.getInt8PtrTy()) + Result = Builder.CreateBitCast(Result, Load->getType(), "reltable.bitcast"); + + // Replace load instruction with the new generated instruction sequence. + BasicBlock::iterator InsertPoint(Load); + ReplaceInstWithValue(Load->getParent()->getInstList(), InsertPoint, Result); + + // Remove GEP instruction. + GEP->eraseFromParent(); +} + +// Convert lookup tables to relative lookup tables in the module. +static bool convertToRelativeLookupTables( + Module &M, function_ref GetTTI) { + Module::iterator FI = M.begin(); + if (FI == M.end()) + return false; + + // Check if we have a target that supports relative lookup tables. + if (!GetTTI(*FI).shouldBuildRelLookupTables()) + return false; + + bool Changed = false; + + for (auto GVI = M.global_begin(), E = M.global_end(); GVI != E;) { + GlobalVariable &GlobalVar = *GVI++; + + if (!shouldConvertToRelLookupTable(M, GlobalVar)) + continue; + + convertToRelLookupTable(GlobalVar); + + // Remove the original lookup table. + GlobalVar.eraseFromParent(); + Changed = true; + } + + return Changed; +} + +PreservedAnalyses RelLookupTableConverterPass::run(Module &M, + ModuleAnalysisManager &AM) { + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + + auto GetTTI = [&](Function &F) -> TargetTransformInfo & { + return FAM.getResult(F); + }; + + if (!convertToRelativeLookupTables(M, GetTTI)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet(); + return PA; +} + +namespace { + +/// Pass that converts lookup tables to relative lookup tables. +class RelLookupTableConverterLegacyPass : public ModulePass { + +public: + /// Pass identification, replacement for typeid + static char ID; + + /// Specify pass name for debug output + StringRef getPassName() const override { + return "Relative Lookup Table Converter"; + } + + RelLookupTableConverterLegacyPass() : ModulePass(ID) { + initializeRelLookupTableConverterLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override { + auto GetTTI = [this](Function &F) -> TargetTransformInfo & { + return this->getAnalysis().getTTI(F); + }; + return convertToRelativeLookupTables(M, GetTTI); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } +}; + +} // anonymous namespace + +char RelLookupTableConverterLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(RelLookupTableConverterLegacyPass, + "rel-lookup-table-converter", + "Convert to relative lookup tables", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(RelLookupTableConverterLegacyPass, + "rel-lookup-table-converter", + "Convert to relative lookup tables", false, false) + +namespace llvm { +ModulePass *createRelLookupTableConverterPass() { + return new RelLookupTableConverterLegacyPass(); +} +} // end namespace llvm diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp --- a/llvm/lib/Transforms/Utils/Utils.cpp +++ b/llvm/lib/Transforms/Utils/Utils.cpp @@ -37,6 +37,7 @@ initializeLowerSwitchLegacyPassPass(Registry); initializeNameAnonGlobalLegacyPassPass(Registry); initializePromoteLegacyPassPass(Registry); + initializeRelLookupTableConverterLegacyPassPass(Registry); initializeStripNonLineTableDebugLegacyPassPass(Registry); initializeUnifyFunctionExitNodesLegacyPassPass(Registry); initializeMetaRenamerPass(Registry); diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -306,6 +306,8 @@ ; GCN-O1-NEXT: Remove redundant instructions ; GCN-O1-NEXT: Hoist/decompose integer division and remainder ; GCN-O1-NEXT: Simplify the CFG +; GCN-O1-NEXT: Relative Lookup Table Converter +; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Annotation Remarks ; GCN-O1-NEXT: Pass Arguments: @@ -660,6 +662,8 @@ ; GCN-O2-NEXT: Remove redundant instructions ; GCN-O2-NEXT: Hoist/decompose integer division and remainder ; GCN-O2-NEXT: Simplify the CFG +; GCN-O2-NEXT: Relative Lookup Table Converter +; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Annotation Remarks ; GCN-O2-NEXT: Pass Arguments: @@ -1019,6 +1023,8 @@ ; GCN-O3-NEXT: Remove redundant instructions ; GCN-O3-NEXT: Hoist/decompose integer division and remainder ; GCN-O3-NEXT: Simplify the CFG +; GCN-O3-NEXT: Relative Lookup Table Converter +; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Annotation Remarks ; GCN-O3-NEXT: Pass Arguments: diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -113,9 +113,9 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis -; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy -; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis -; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy +; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis +; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}> ; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass ; CHECK-O-NEXT: Starting CGSCC pass manager run. @@ -256,6 +256,8 @@ ; CHECK-O-NEXT: Running pass: CGProfilePass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: ConstantMergePass +; CHECK-O-NEXT: Running pass: RelLookupTableConverterPass +; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo ; CHECK-LTO-NEXT: Running pass: CanonicalizeAliasesPass ; CHECK-LTO-NEXT: Running pass: NameAnonGlobalPass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -98,9 +98,9 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis -; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy -; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis -; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy +; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis +; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass ; CHECK-O-NEXT: Starting CGSCC pass manager run. @@ -243,6 +243,8 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: CGProfilePass ; CHECK-POSTLINK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-POSTLINK-O-NEXT: Running pass: ConstantMergePass +; CHECK-POSTLINK-O-NEXT: Running pass: RelLookupTableConverterPass +; CHECK-POSTLINK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo ; CHECK-PRELINK-O-NEXT: Running pass: CanonicalizeAliasesPass ; CHECK-PRELINK-O-NEXT: Running pass: NameAnonGlobalPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -68,10 +68,10 @@ ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. -; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA -; CHECK-O-NEXT: Running analysis: GlobalsAA -; CHECK-O-NEXT: Running analysis: CallGraphAnalysis -; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis +; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA +; CHECK-O-NEXT: Running analysis: GlobalsAA +; CHECK-O-NEXT: Running analysis: CallGraphAnalysis +; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy @@ -212,6 +212,8 @@ ; CHECK-O-NEXT: Running pass: CGProfilePass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: ConstantMergePass +; CHECK-O-NEXT: Running pass: RelLookupTableConverterPass +; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo ; CHECK-O-NEXT: Running pass: PrintModulePass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -78,9 +78,9 @@ ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. -; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA -; CHECK-O-NEXT: Running analysis: GlobalsAA -; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis +; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA +; CHECK-O-NEXT: Running analysis: GlobalsAA +; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy @@ -224,6 +224,8 @@ ; CHECK-O-NEXT: Running pass: CGProfilePass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: ConstantMergePass +; CHECK-O-NEXT: Running pass: RelLookupTableConverterPass +; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo ; CHECK-O-NEXT: Running pass: PrintModulePass diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -307,6 +307,8 @@ ; CHECK-NEXT: Remove redundant instructions ; CHECK-NEXT: Hoist/decompose integer division and remainder ; CHECK-NEXT: Simplify the CFG +; CHECK-NEXT: Relative Lookup Table Converter +; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Annotation Remarks ; CHECK-NEXT: Module Verifier ; CHECK-NEXT: Bitcode Writer diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll --- a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll +++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll @@ -319,6 +319,8 @@ ; CHECK-NEXT: Remove redundant instructions ; CHECK-NEXT: Hoist/decompose integer division and remainder ; CHECK-NEXT: Simplify the CFG +; CHECK-NEXT: Relative Lookup Table Converter +; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Annotation Remarks ; CHECK-NEXT: Module Verifier ; CHECK-NEXT: Bitcode Writer diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -312,6 +312,8 @@ ; CHECK-NEXT: Remove redundant instructions ; CHECK-NEXT: Hoist/decompose integer division and remainder ; CHECK-NEXT: Simplify the CFG +; CHECK-NEXT: Relative Lookup Table Converter +; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Annotation Remarks ; CHECK-NEXT: Module Verifier ; CHECK-NEXT: Bitcode Writer diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -293,6 +293,8 @@ ; CHECK-NEXT: Remove redundant instructions ; CHECK-NEXT: Hoist/decompose integer division and remainder ; CHECK-NEXT: Simplify the CFG +; CHECK-NEXT: Relative Lookup Table Converter +; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Annotation Remarks ; CHECK-NEXT: Module Verifier ; CHECK-NEXT: Bitcode Writer diff --git a/llvm/test/Other/pass-pipelines.ll b/llvm/test/Other/pass-pipelines.ll --- a/llvm/test/Other/pass-pipelines.ll +++ b/llvm/test/Other/pass-pipelines.ll @@ -106,6 +106,8 @@ ; CHECK-O2: Loop Pass Manager ; CHECK-O2-NEXT: Loop Sink ; CHECK-O2: Simplify the CFG +; CHECK-O2: Relative Lookup Table Converter +; CHECK-O2: FunctionPass Manager ; CHECK-O2-NOT: Manager ; ; FIXME: There really shouldn't be another pass manager, especially one that diff --git a/llvm/test/Transforms/RelLookupTableConverter/X86/no_relative_lookup_table.ll b/llvm/test/Transforms/RelLookupTableConverter/X86/no_relative_lookup_table.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/RelLookupTableConverter/X86/no_relative_lookup_table.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -rel-lookup-table-converter -mtriple=x86_64-linux -S | FileCheck %s +; RUN: opt < %s -rel-lookup-table-converter -mtriple=i386-unknown-unknown -relocation-model=pic -S | FileCheck %s +; RUN: opt < %s -rel-lookup-table-converter -mtriple=x86_64-linux -relocation-model=pic -code-model=medium -S | FileCheck %s +; RUN: opt < %s -rel-lookup-table-converter -mtriple=x86_64-linux -relocation-model=pic -code-model=large -S | FileCheck %s + +; RUN: opt < %s -passes=rel-lookup-table-converter -mtriple=x86_64-linux -S | FileCheck %s +; RUN: opt < %s -passes=rel-lookup-table-converter -mtriple=i386-unknown-unknown -relocation-model=pic -S | FileCheck %s +; RUN: opt < %s -passes=rel-lookup-table-converter -mtriple=x86_64-linux -relocation-model=pic -code-model=medium -S | FileCheck %s +; RUN: opt < %s -passes=rel-lookup-table-converter -mtriple=x86_64-linux -relocation-model=pic -code-model=large -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +@.str = private unnamed_addr constant [5 x i8] c"zero\00", align 1 +@.str.1 = private unnamed_addr constant [4 x i8] c"one\00", align 1 +@.str.2 = private unnamed_addr constant [4 x i8] c"two\00", align 1 +@.str.3 = private unnamed_addr constant [8 x i8] c"default\00", align 1 + +@switch.table.string_table = private unnamed_addr constant [3 x i8*] + [ + i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), + i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), + i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0) + ], align 8 + +; Switch lookup table +; CHECK: @switch.table.string_table = private unnamed_addr constant [3 x i8*] +; CHECK-SAME: [ +; CHECK-SAME: i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), +; CHECK-SAME: i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), +; CHECK-SAME: i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0) +; CHECK-SAME: ], align 8 + +; ; Relative switch lookup table for strings +define i8* @string_table(i32 %cond) { + ; CHECK-LABEL: @string_table( + ; CHECK-NEXT: entry: + ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[COND:%.*]], 3 + ; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] + ; CHECK: switch.lookup: + ; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* @switch.table.string_table, i32 0, i32 [[COND]] + ; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i8*, i8** [[SWITCH_GEP]], align 8 + ; CHECK-NEXT: ret i8* [[SWITCH_LOAD]] + ; CHECK: return: + ; CHECK-NEXT: ret i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.3, i64 0, i64 0) + +entry: + %0 = icmp ult i32 %cond, 3 + br i1 %0, label %switch.lookup, label %return + +switch.lookup: ; preds = %entry + %switch.gep = getelementptr inbounds [3 x i8*], [3 x i8*]* @switch.table.string_table, i32 0, i32 %cond + %switch.load = load i8*, i8** %switch.gep, align 8 + ret i8* %switch.load + +return: ; preds = %entry + ret i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.3, i64 0, i64 0) +} diff --git a/llvm/test/Transforms/RelLookupTableConverter/X86/relative_lookup_table.ll b/llvm/test/Transforms/RelLookupTableConverter/X86/relative_lookup_table.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/RelLookupTableConverter/X86/relative_lookup_table.ll @@ -0,0 +1,310 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -rel-lookup-table-converter -relocation-model=pic -S | FileCheck %s +; RUN: opt < %s -passes=rel-lookup-table-converter -relocation-model=pic -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str = private unnamed_addr constant [5 x i8] c"zero\00", align 1 +@.str.1 = private unnamed_addr constant [4 x i8] c"one\00", align 1 +@.str.2 = private unnamed_addr constant [4 x i8] c"two\00", align 1 +@.str.3 = private unnamed_addr constant [8 x i8] c"default\00", align 1 +@.str.4 = private unnamed_addr constant [6 x i8] c"three\00", align 1 +@.str.5 = private unnamed_addr constant [5 x i8] c"str1\00", align 1 +@.str.6 = private unnamed_addr constant [5 x i8] c"str2\00", align 1 +@.str.7 = private unnamed_addr constant [12 x i8] c"singlevalue\00", align 1 + +@a1 = external global i32, align 4 +@b1 = external global i32, align 4 +@c1 = external global i32, align 4 +@d1 = external global i32, align 4 + +@a2 = internal global i32 0, align 4 +@b2 = internal global i32 0, align 4 +@c2 = internal global i32 0, align 4 +@d2 = internal global i32 0, align 4 + +@hidden0 = external hidden global i32, align 8 +@hidden1 = external hidden global i32, align 8 +@hidden2 = external hidden global i32, align 8 +@hidden3 = external hidden global i32, align 8 + +@switch.table.no_dso_local = private unnamed_addr constant [3 x i32*] [i32* @a1, i32* @b1, i32* @c1], align 8 + +@switch.table.dso_local = private unnamed_addr constant [3 x i32*] [i32* @a2, i32* @b2, i32* @c2], align 8 + +@switch.table.hidden = private unnamed_addr constant [3 x i32*] [i32* @hidden0, i32* @hidden1, i32* @hidden2], align 8 + +@switch.table.string_table = private unnamed_addr constant [3 x i8*] + [ + i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), + i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), + i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0) + ], align 8 + +@switch.table.string_table_holes = private unnamed_addr constant [4 x i8*] + [ + i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), + i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.3, i64 0, i64 0), + i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), + i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.4, i64 0, i64 0) + ], align 8 + +@switch.table.single_value = private unnamed_addr constant [3 x i8*] + [ + i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), + i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), + i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0) + ], align 8 + +@user_defined_lookup_table.table = internal unnamed_addr constant [3 x i8*] + [ + i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), + i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i32 0, i32 0), + i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i32 0, i32 0) + ], align 16 + +; Lookup table for non dso-local integer pointers +; CHECK: @switch.table.no_dso_local = private unnamed_addr constant [3 x i32*] [i32* @a1, i32* @b1, i32* @c1], align + +; Relative switch lookup table for dso-local integer pointers +; CHECK: @reltable.dso_local = private unnamed_addr constant [3 x i32] +; CHECK-SAME: [ +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (i32* @a2 to i64), i64 ptrtoint ([3 x i32]* @reltable.dso_local to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (i32* @b2 to i64), i64 ptrtoint ([3 x i32]* @reltable.dso_local to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (i32* @c2 to i64), i64 ptrtoint ([3 x i32]* @reltable.dso_local to i64)) to i32) +; CHECK-SAME: ], align 4 + +; Relative switch lookup table for integer pointers with hidden visibility +; CHECK: @reltable.hidden = private unnamed_addr constant [3 x i32] +; CHECK-SAME: [ +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (i32* @hidden0 to i64), i64 ptrtoint ([3 x i32]* @reltable.hidden to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (i32* @hidden1 to i64), i64 ptrtoint ([3 x i32]* @reltable.hidden to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (i32* @hidden2 to i64), i64 ptrtoint ([3 x i32]* @reltable.hidden to i64)) to i32) +; CHECK-SAME: ], align 4 + +; Relative switch lookup table for strings +; CHECK: @reltable.string_table = private unnamed_addr constant [3 x i32] +; CHECK-SAME: [ +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([5 x i8]* @.str to i64), i64 ptrtoint ([3 x i32]* @reltable.string_table to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([4 x i8]* @.str.1 to i64), i64 ptrtoint ([3 x i32]* @reltable.string_table to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([4 x i8]* @.str.2 to i64), i64 ptrtoint ([3 x i32]* @reltable.string_table to i64)) to i32) +; CHECK-SAME: ], align 4 + +; Relative switch lookup table for strings with holes, where holes are filled with relative offset to default values +; CHECK: @reltable.string_table_holes = private unnamed_addr constant [4 x i32] +; CHECK-SAME: [ +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([5 x i8]* @.str to i64), i64 ptrtoint ([4 x i32]* @reltable.string_table_holes to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([8 x i8]* @.str.3 to i64), i64 ptrtoint ([4 x i32]* @reltable.string_table_holes to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([4 x i8]* @.str.2 to i64), i64 ptrtoint ([4 x i32]* @reltable.string_table_holes to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([6 x i8]* @.str.4 to i64), i64 ptrtoint ([4 x i32]* @reltable.string_table_holes to i64)) to i32) +; CHECK-SAME: ], align 4 + +; Single value check +; CHECK: @reltable.single_value = private unnamed_addr constant [3 x i32] +; CHECK-SAME: [ +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([5 x i8]* @.str to i64), i64 ptrtoint ([3 x i32]* @reltable.single_value to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([4 x i8]* @.str.1 to i64), i64 ptrtoint ([3 x i32]* @reltable.single_value to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint ([4 x i8]* @.str.2 to i64), i64 ptrtoint ([3 x i32]* @reltable.single_value to i64)) to i32) +; CHECK-SAME: ], align 4 +; + +; Lookup table check for non dso-local integer pointers +define i32* @no_dso_local(i32 %cond) { +; CHECK-LABEL: @no_dso_local( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[COND:%.*]], 3 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* @switch.table.no_dso_local, i32 0, i32 [[COND:%.*]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32*, i32** [[SWITCH_GEP]], align 8 +; CHECK-NEXT: ret i32* [[SWITCH_LOAD]] +; CHECK: return: +; CHECK-NEXT: ret i32* @d1 +; +entry: + %0 = icmp ult i32 %cond, 3 + br i1 %0, label %switch.lookup, label %return + +switch.lookup: ; preds = %entry + %switch.gep = getelementptr inbounds [3 x i32*], [3 x i32*]* @switch.table.no_dso_local, i32 0, i32 %cond + %switch.load = load i32*, i32** %switch.gep, align 8 + ret i32* %switch.load + +return: ; preds = %entry + ret i32* @d1 +} + +; Relative switch lookup table for dso-local integer pointers +define i32* @dso_local(i32 %cond) { +; CHECK-LABEL: @dso_local( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[COND:%.*]], 3 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i32 %cond, 2 +; CHECK-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call i8* @llvm.load.relative.i32(i8* bitcast ([3 x i32]* @reltable.dso_local to i8*), i32 [[RELTABLE_SHIFT]]) +; CHECK-NEXT: [[BIT_CAST:%.*]] = bitcast i8* [[RELTABLE_INTRINSIC]] to i32* +; CHECK-NEXT: ret i32* [[BIT_CAST]] +; CHECK: return: +; CHECK-NEXT: ret i32* @d2 +; +entry: + %0 = icmp ult i32 %cond, 3 + br i1 %0, label %switch.lookup, label %return + +switch.lookup: ; preds = %entry + %switch.gep = getelementptr inbounds [3 x i32*], [3 x i32*]* @switch.table.dso_local, i32 0, i32 %cond + %switch.load = load i32*, i32** %switch.gep, align 8 + ret i32* %switch.load + +return: ; preds = %entry + ret i32* @d2 +} + +; Relative switch lookup table for integer pointers with hidden visibility +define i32* @hidden(i32 %cond) { +; CHECK-LABEL: @hidden( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[COND:%.*]], 3 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i32 %cond, 2 +; CHECK-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call i8* @llvm.load.relative.i32(i8* bitcast ([3 x i32]* @reltable.hidden to i8*), i32 [[RELTABLE_SHIFT]]) +; CHECK-NEXT: [[BIT_CAST:%.*]] = bitcast i8* [[RELTABLE_INTRINSIC]] to i32* +; CHECK-NEXT: ret i32* [[BIT_CAST]] +; CHECK: return: +; CHECK-NEXT: ret i32* @d2 +; +entry: + %0 = icmp ult i32 %cond, 3 + br i1 %0, label %switch.lookup, label %return + +switch.lookup: ; preds = %entry + %switch.gep = getelementptr inbounds [3 x i32*], [3 x i32*]* @switch.table.hidden, i32 0, i32 %cond + %switch.load = load i32*, i32** %switch.gep, align 8 + ret i32* %switch.load + +return: ; preds = %entry + ret i32* @d2 +} + +; ; Relative switch lookup table for strings +define i8* @string_table(i32 %cond) { + ; CHECK-LABEL: @string_table( + ; CHECK-NEXT: entry: + ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[COND:%.*]], 3 + ; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] + ; CHECK: switch.lookup: + ; CHECK-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i32 %cond, 2 + ; CHECK-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call i8* @llvm.load.relative.i32(i8* bitcast ([3 x i32]* @reltable.string_table to i8*), i32 [[RELTABLE_SHIFT]]) + ; CHECK-NEXT: ret i8* [[RELTABLE_INTRINSIC]] + ; CHECK: return: + ; CHECK-NEXT: ret i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.3, i64 0, i64 0) + ; +entry: + %0 = icmp ult i32 %cond, 3 + br i1 %0, label %switch.lookup, label %return + +switch.lookup: ; preds = %entry + %switch.gep = getelementptr inbounds [3 x i8*], [3 x i8*]* @switch.table.string_table, i32 0, i32 %cond + %switch.load = load i8*, i8** %switch.gep, align 8 + ret i8* %switch.load + +return: ; preds = %entry + ret i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.3, i64 0, i64 0) +} + +; Relative switch lookup table for strings with holes, where holes are filled with relative offset to default values +define i8* @string_table_holes(i32 %cond) { +; CHECK-LABEL: @string_table_holes( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[COND:%.*]], 4 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i32 [[COND]], 2 +; CHECK-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call i8* @llvm.load.relative.i32(i8* bitcast ([4 x i32]* @reltable.string_table_holes to i8*), i32 [[RELTABLE_SHIFT]]) +; CHECK-NEXT: ret i8* [[RELTABLE_INTRINSIC]] +; CHECK: return: +; CHECK-NEXT: ret i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.3, i64 0, i64 0) +; +entry: + %0 = icmp ult i32 %cond, 4 + br i1 %0, label %switch.lookup, label %return + +switch.lookup: ; preds = %entry + %switch.gep = getelementptr inbounds [4 x i8*], [4 x i8*]* @switch.table.string_table_holes, i32 0, i32 %cond + %switch.load = load i8*, i8** %switch.gep, align 8 + ret i8* %switch.load + +return: ; preds = %entry + ret i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.3, i64 0, i64 0) +} + + +; Single value check +; If there is a lookup table, where each element contains the same value, +; a relative lookup should not be generated +define void @single_value(i32 %cond) { +; CHECK-LABEL: @single_value( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[COND:%.*]], 3 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK: switch.lookup: +; CHECK-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i32 [[COND]], 2 +; CHECK-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call i8* @llvm.load.relative.i32(i8* bitcast ([3 x i32]* @reltable.single_value to i8*), i32 [[RELTABLE_SHIFT]]) +; CHECK: sw.epilog: +; CHECK-NEXT: [[STR1:%.*]] = phi i8* [ getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i64 0, i64 0), %entry ], [ getelementptr inbounds ([12 x i8], [12 x i8]* @.str.7, i64 0, i64 0), %switch.lookup ] +; CHECK-NEXT: [[STR2:%.*]] = phi i8* [ getelementptr inbounds ([5 x i8], [5 x i8]* @.str.6, i64 0, i64 0), %entry ], [ [[RELTABLE_INTRINSIC]], [[SWITCH_LOOKUP]] ] +; CHECK-NEXT: ret void + +entry: + %0 = icmp ult i32 %cond, 3 + br i1 %0, label %switch.lookup, label %sw.epilog + +switch.lookup: ; preds = %entry + %switch.gep = getelementptr inbounds [3 x i8*], [3 x i8*]* @switch.table.single_value, i32 0, i32 %cond + %switch.load = load i8*, i8** %switch.gep, align 8 + br label %sw.epilog + +sw.epilog: ; preds = %switch.lookup, %entry + %str1.0 = phi i8* [ getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i64 0, i64 0), %entry ], [ getelementptr inbounds ([12 x i8], [12 x i8]* @.str.7, i64 0, i64 0), %switch.lookup ] + %str2.0 = phi i8* [ getelementptr inbounds ([5 x i8], [5 x i8]* @.str.6, i64 0, i64 0), %entry ], [ %switch.load, %switch.lookup ] + ret void +} + +; Relative lookup table generated for a user-defined lookup table +define i8* @user_defined_lookup_table(i32 %cond) { +; CHECK-LABEL: @user_defined_lookup_table( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[COND:%.*]], 3 +; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[IDX_PROM:%.*]] = sext i32 [[COND]] to i64 +; CHECK-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i64 [[IDX_PROM]], 2 +; CHECK-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call i8* @llvm.load.relative.i64(i8* bitcast ([3 x i32]* @reltable.user_defined_lookup_table to i8*), i64 [[RELTABLE_SHIFT]]) +; CHECK-NEXT: br label %cond.end +; CHECK: cond.end: +; CHECK-NEXT: [[COND1:%.*]] = phi i8* [ [[RELTABLE_INTRINSIC]], %cond.false ], [ getelementptr inbounds ([8 x i8], [8 x i8]* @.str.3, i64 0, i64 0), %entry ] +; CHECK-NEXT: ret i8* [[COND1]] +; +entry: + %cmp = icmp sgt i32 %cond, 3 + br i1 %cmp, label %cond.end, label %cond.false + +cond.false: ; preds = %entry + %idxprom = sext i32 %cond to i64 + %arrayidx = getelementptr inbounds [3 x i8*], [3 x i8*]* @user_defined_lookup_table.table, i64 0, i64 %idxprom + %0 = load i8*, i8** %arrayidx, align 8, !tbaa !4 + br label %cond.end + +cond.end: ; preds = %entry, %cond.false + %cond1 = phi i8* [ %0, %cond.false ], [ getelementptr inbounds ([8 x i8], [8 x i8]* @.str.3, i64 0, i64 0), %entry ] + ret i8* %cond1 +} + +!llvm.module.flags = !{!0, !1} +!0 = !{i32 7, !"PIC Level", i32 2} +!1 = !{i32 1, !"Code Model", i32 1} +!4 = !{!"any pointer", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn @@ -61,6 +61,7 @@ "NameAnonGlobals.cpp", "PredicateInfo.cpp", "PromoteMemoryToRegister.cpp", + "RelLookupTableConverter.cpp" "SSAUpdater.cpp", "SSAUpdaterBulk.cpp", "SampleProfileLoaderBaseUtil.cpp",