Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h @@ -381,6 +381,10 @@ /// target. bool shouldBuildLookupTables() const; + /// \brief Return true if switches should be turned into lookup tables + /// containing this constant value for the target. + bool shouldBuildLookupTablesForConstant(Constant *C) const; + /// \brief Don't restrict interleaved unrolling to small loops. bool enableAggressiveInterleaving(bool LoopHasReductions) const; @@ -703,6 +707,7 @@ virtual unsigned getJumpBufAlignment() = 0; virtual unsigned getJumpBufSize() = 0; virtual bool shouldBuildLookupTables() = 0; + virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; virtual bool enableInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; @@ -888,6 +893,9 @@ bool shouldBuildLookupTables() override { return Impl.shouldBuildLookupTables(); } + bool shouldBuildLookupTablesForConstant(Constant *C) override { + return Impl.shouldBuildLookupTablesForConstant(C); + } bool enableAggressiveInterleaving(bool LoopHasReductions) override { return Impl.enableAggressiveInterleaving(LoopHasReductions); } Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -248,6 +248,7 @@ unsigned getJumpBufSize() { return 0; } bool shouldBuildLookupTables() { return true; } + bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } Index: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp +++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp @@ -178,6 +178,9 @@ bool TargetTransformInfo::shouldBuildLookupTables() const { return TTIImpl->shouldBuildLookupTables(); } +bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const { + return TTIImpl->shouldBuildLookupTablesForConstant(C); +} bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const { return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); Index: llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.h +++ llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.h @@ -128,6 +128,16 @@ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace); + + bool shouldBuildLookupTablesForConstant(Constant *C) const { + // In the ROPI and RWPI relocation models we can't have pointers to global + // variables or functions in constant data, so don't convert switches to + // lookup tables if any of the values would need relocation. + if (ST->isROPI() || ST->isRWPI()) + return !C->needsRelocation(); + + return true; + } /// @} }; Index: llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp @@ -4425,18 +4425,25 @@ /// Return true if the backend will be able to handle /// initializing an array of constants like C. -static bool ValidLookupTableConstant(Constant *C) { +static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) { if (C->isThreadDependent()) return false; if (C->isDLLImportDependent()) return false; + if (!isa(C) && !isa(C) && + !isa(C) && !isa(C) && + !isa(C) && !isa(C)) + return false; + if (ConstantExpr *CE = dyn_cast(C)) - return CE->isGEPWithNoNotionalOverIndexing(); + if (!CE->isGEPWithNoNotionalOverIndexing()) + return false; - return isa(C) || isa(C) || - isa(C) || isa(C) || - isa(C); + if (!TTI.shouldBuildLookupTablesForConstant(C)) + return false; + + return true; } /// If V is a Constant, return it. Otherwise, try to look up @@ -4490,8 +4497,8 @@ static bool GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, - SmallVectorImpl> &Res, - const DataLayout &DL) { + SmallVectorImpl > &Res, + const DataLayout &DL, const TargetTransformInfo &TTI) { // The block from which we enter the common destination. BasicBlock *Pred = SI->getParent(); @@ -4553,7 +4560,7 @@ return false; // Be conservative about which kinds of constants we support. - if (!ValidLookupTableConstant(ConstVal)) + if (!ValidLookupTableConstant(ConstVal, TTI)) return false; Res.push_back(std::make_pair(PHI, ConstVal)); @@ -4585,14 +4592,15 @@ BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, - const DataLayout &DL) { + const DataLayout &DL, + const TargetTransformInfo &TTI) { for (auto &I : SI->cases()) { ConstantInt *CaseVal = I.getCaseValue(); // Resulting value at phi nodes for this case value. SwitchCaseResultsTy Results; if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results, - DL)) + DL, TTI)) return false; // Only one value per case is permitted @@ -4610,7 +4618,7 @@ SmallVector, 1> DefaultResults; BasicBlock *DefaultDest = SI->getDefaultDest(); GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults, - DL); + DL, TTI); // If the default value is not found abort unless the default destination // is unreachable. DefaultResult = @@ -4689,7 +4697,8 @@ /// phi nodes in a common successor block with only two different /// constant values, replace the switch with select. static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, - AssumptionCache *AC, const DataLayout &DL) { + AssumptionCache *AC, const DataLayout &DL, + const TargetTransformInfo &TTI) { Value *const Cond = SI->getCondition(); PHINode *PHI = nullptr; BasicBlock *CommonDest = nullptr; @@ -4697,7 +4706,7 @@ SwitchCaseResultVectorTy UniqueResults; // Collect all the cases that will deliver the same value from the switch. if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult, - DL)) + DL, TTI)) return false; // Selects choose between maximum two values. if (UniqueResults.size() != 2) @@ -5135,7 +5144,7 @@ typedef SmallVector, 4> ResultsTy; ResultsTy Results; if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest, - Results, DL)) + Results, DL, TTI)) return false; // Append the result from this case to the list for each phi. @@ -5161,8 +5170,9 @@ // If the table has holes, we need a constant result for the default case // or a bitmask that fits in a register. SmallVector, 4> DefaultResultsList; - bool HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(), - &CommonDest, DefaultResultsList, DL); + bool HasDefaultResults = + GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, + DefaultResultsList, DL, TTI); bool NeedMask = (TableHasHoles && !HasDefaultResults); if (NeedMask) { @@ -5458,7 +5468,7 @@ if (EliminateDeadSwitchCases(SI, AC, DL)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; - if (SwitchToSelect(SI, Builder, AC, DL)) + if (SwitchToSelect(SI, Builder, AC, DL, TTI)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; if (ForwardSwitchConditionToPHI(SI)) Index: llvm/trunk/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll =================================================================== --- llvm/trunk/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll +++ llvm/trunk/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll @@ -0,0 +1,132 @@ +; RUN: opt -S -simplifycfg -mtriple=arm -relocation-model=static < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE +; RUN: opt -S -simplifycfg -mtriple=arm -relocation-model=pic < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE +; RUN: opt -S -simplifycfg -mtriple=arm -relocation-model=ropi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; RUN: opt -S -simplifycfg -mtriple=arm -relocation-model=rwpi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; RUN: opt -S -simplifycfg -mtriple=arm -relocation-model=ropi-rwpi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE + +; CHECK: @{{.*}} = private unnamed_addr constant [3 x i32] [i32 1234, i32 5678, i32 15532] +; ENABLE: @{{.*}} = private unnamed_addr constant [3 x i32*] [i32* @c1, i32* @c2, i32* @c3] +; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [3 x i32*] [i32* @c1, i32* @c2, i32* @c3] +; ENABLE: @{{.*}} = private unnamed_addr constant [3 x i32*] [i32* @g1, i32* @g2, i32* @g3] +; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [3 x i32*] [i32* @g1, i32* @g2, i32* @g3] +; ENABLE: @{{.*}} = private unnamed_addr constant [3 x i32 (i32, i32)*] [i32 (i32, i32)* @f1, i32 (i32, i32)* @f2, i32 (i32, i32)* @f3] +; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [3 x i32 (i32, i32)*] [i32 (i32, i32)* @f1, i32 (i32, i32)* @f2, i32 (i32, i32)* @f3] + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7a--none-eabi" + +define i32 @test1(i32 %n) { +entry: + switch i32 %n, label %sw.default [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + ] + +sw.bb: + br label %return + +sw.bb1: + br label %return + +sw.bb2: + br label %return + +sw.default: + br label %return + +return: + %retval.0 = phi i32 [ 15498, %sw.default ], [ 15532, %sw.bb2 ], [ 5678, %sw.bb1 ], [ 1234, %sw.bb ] + ret i32 %retval.0 +} + +@c1 = external constant i32, align 4 +@c2 = external constant i32, align 4 +@c3 = external constant i32, align 4 +@c4 = external constant i32, align 4 + + +define i32* @test2(i32 %n) { +entry: + switch i32 %n, label %sw.default [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + ] + +sw.bb: + br label %return + +sw.bb1: + br label %return + +sw.bb2: + br label %return + +sw.default: + br label %return + +return: + %retval.0 = phi i32* [ @c4, %sw.default ], [ @c3, %sw.bb2 ], [ @c2, %sw.bb1 ], [ @c1, %sw.bb ] + ret i32* %retval.0 +} + +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 +@g3 = external global i32, align 4 +@g4 = external global i32, align 4 + +define i32* @test3(i32 %n) { +entry: + switch i32 %n, label %sw.default [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + ] + +sw.bb: + br label %return + +sw.bb1: + br label %return + +sw.bb2: + br label %return + +sw.default: + br label %return + +return: + %retval.0 = phi i32* [ @g4, %sw.default ], [ @g3, %sw.bb2 ], [ @g2, %sw.bb1 ], [ @g1, %sw.bb ] + ret i32* %retval.0 +} + +declare i32 @f1(i32, i32) +declare i32 @f2(i32, i32) +declare i32 @f3(i32, i32) +declare i32 @f4(i32, i32) +declare i32 @f5(i32, i32) + +define i32 @test4(i32 %a, i32 %b, i32 %c) { +entry: + %cmp = icmp eq i32 %a, 1 + br i1 %cmp, label %cond.end11, label %cond.false + +cond.false: + %cmp1 = icmp eq i32 %a, 2 + br i1 %cmp1, label %cond.end11, label %cond.false3 + +cond.false3: + %cmp4 = icmp eq i32 %a, 3 + br i1 %cmp4, label %cond.end11, label %cond.false6 + +cond.false6: + %cmp7 = icmp eq i32 %a, 4 + %cond = select i1 %cmp7, i32 (i32, i32)* @f4, i32 (i32, i32)* @f5 + br label %cond.end11 + +cond.end11: + %cond12 = phi i32 (i32, i32)* [ @f1, %entry ], [ @f2, %cond.false ], [ %cond, %cond.false6 ], [ @f3, %cond.false3 ] + %call = call i32 %cond12(i32 %b, i32 %c) #2 + ret i32 %call +}