Index: include/llvm/IR/BasicBlock.h =================================================================== --- include/llvm/IR/BasicBlock.h +++ include/llvm/IR/BasicBlock.h @@ -315,6 +315,9 @@ LandingPadInst *getLandingPadInst(); const LandingPadInst *getLandingPadInst() const; + /// Checks if the bodies of two basic blocks are identical. + bool isIdenticalTo(const BasicBlock &Other) const; + private: /// \brief Increment the internal refcount of the number of BlockAddresses /// referencing this BasicBlock by \p Amt. Index: lib/IR/BasicBlock.cpp =================================================================== --- lib/IR/BasicBlock.cpp +++ lib/IR/BasicBlock.cpp @@ -431,3 +431,26 @@ const LandingPadInst *BasicBlock::getLandingPadInst() const { return dyn_cast(getFirstNonPHI()); } + +bool BasicBlock::isIdenticalTo(const BasicBlock &Other) const { + // Two blocks cannot be equivalent if they are different sizes. + if (this->size() != Other.size()) + return false; + + auto Cur1 = this->begin(); + auto Cur2 = Other.begin(); + + while(Cur1 != this->end()) { + + if (!(*Cur1).isIdenticalTo(&*Cur2)) + return false; + + ++Cur1; + ++Cur2; + } + + assert(Cur1 == this->end() && Cur2 == Other.end() && + "we should've handled this earlier"); + + return true; +} Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -73,6 +73,10 @@ "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes")); +static cl::opt KeepDuplicateBlocks( + "simplifycfg-keep-duplicate-blocks", cl::Hidden, cl::init(false), + cl::desc("Keep duplicated switch case basic blocks")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); @@ -3396,6 +3400,45 @@ return !DeadCases.empty(); } +/// Attempts to remove redundant duplicate case successors. +/// +/// We only check adjacent case values because they are the most +/// likely to be the same in real-world code, and it avoids the +/// O(n^2) complexity of checking every case. +/// +/// Returns `true` if the instruction was modified. +static bool EliminateDuplicateSwitchSuccessors(SwitchInst *SI, AssumptionCache *AC, + const DataLayout &DL) { + + // We don't care if the switch has one case. + if (SI->getNumCases() <= 1) + return false; + + SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); + + // Loop for every case. + while(1) { + + BasicBlock *CurSuccessor = I.getCaseSuccessor(); + ++I; + + // Check if we finished iteration. + if (I == E) + break; + + BasicBlock *NextSuccessor = I.getCaseSuccessor(); + + // If the basic blocks are identical, we can remove one because + // it is redundant. + if (CurSuccessor->isIdenticalTo(*NextSuccessor)) { + // Throw away one of the successors. + I.setSuccessor(CurSuccessor); + } + } + + return false; +} + /// If BB would be eligible for simplification by /// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated /// by an unconditional branch), look at the phi node for BB in the successor @@ -4412,6 +4455,9 @@ if (ForwardSwitchConditionToPHI(SI)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + if (!KeepDuplicateBlocks && EliminateDuplicateSwitchSuccessors(SI, AC, DL)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + if (SwitchToLookupTable(SI, Builder, DL, TTI)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; Index: test/CodeGen/AArch64/arm64-jumptable.ll =================================================================== --- test/CodeGen/AArch64/arm64-jumptable.ll +++ test/CodeGen/AArch64/arm64-jumptable.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s -; RUN: llc -mtriple=arm64-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-LINUX +; RUN: llc -mtriple=arm64-apple-ios -aarch64-atomic-cfg-tidy=false < %s | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -aarch64-atomic-cfg-tidy=false < %s | FileCheck %s --check-prefix=CHECK-LINUX ; define void @sum(i32* %to) { Index: test/CodeGen/ARM/jump-table-islands-split.ll =================================================================== --- test/CodeGen/ARM/jump-table-islands-split.ll +++ test/CodeGen/ARM/jump-table-islands-split.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=thumbv7s-apple-ios8.0 -o - %s | FileCheck %s +; RUN: llc -mtriple=thumbv7s-apple-ios8.0 -arm-atomic-cfg-tidy=false -o - %s | FileCheck %s declare void @foo(double) declare i32 @llvm.arm.space(i32, i32) Index: test/Transforms/SimplifyCFG/CoveredLookupTable.ll =================================================================== --- test/Transforms/SimplifyCFG/CoveredLookupTable.ll +++ test/Transforms/SimplifyCFG/CoveredLookupTable.ll @@ -1,4 +1,4 @@ -; RUN: opt -simplifycfg -S %s | FileCheck %s +; RUN: opt -simplifycfg -S %s -simplifycfg-keep-duplicate-blocks | FileCheck %s ; rdar://15268442 target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" Index: test/Transforms/SimplifyCFG/X86/switch-table-bug.ll =================================================================== --- test/Transforms/SimplifyCFG/X86/switch-table-bug.ll +++ test/Transforms/SimplifyCFG/X86/switch-table-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -simplifycfg < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s +; RUN: opt -S -simplifycfg < %s -mtriple=x86_64-apple-darwin12.0.0 -simplifycfg-keep-duplicate-blocks | FileCheck %s ; rdar://17735071 target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin12.0.0" Index: test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll =================================================================== --- test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll +++ test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -simplifycfg -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +; RUN: opt < %s -simplifycfg -S -mtriple=x86_64-unknown-linux-gnu -simplifycfg-keep-duplicate-blocks | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/SimplifyCFG/switch-duplicate-successors.ll =================================================================== --- /dev/null +++ test/Transforms/SimplifyCFG/switch-duplicate-successors.ll @@ -0,0 +1,84 @@ +; RUN: opt %s -S -simplifycfg | FileCheck %s + +declare void @foo() + +; This test checks that SimplifyCFG correctly recognizes +; that switch arms point to duplicate basic blocks, and +; removes references to all all unneeded blocks. +define void @main() { +; CHECK-LABEL: @main + +; CHECK: switch i32 %1, label %join [ + +; CHECK-NEXT: i32 1, label %match_case +; CHECK-NEXT: i32 48, label %match_case +; CHECK-NEXT: i32 92, label %match_case +; CHECK-NEXT: i32 23, label %match_case +; CHECK-NEXT: i32 4, label %match_case +; CHECK-NEXT: i32 70, label %match_case +; CHECK-NEXT: i32 29, label %match_case +; CHECK-NEXT: i32 36, label %match_case +; CHECK-NEXT: i32 34, label %match_case +; CHECK-NEXT: i32 58, label %match_case + + %value = alloca i32 + store i32 5, i32* %value + %1 = load i32, i32* %value + + switch i32 %1, label %join [ + i32 1, label %match_case + i32 48, label %match_case1 + i32 92, label %match_case2 + i32 23, label %match_case3 + i32 4, label %match_case4 + i32 70, label %match_case5 + i32 29, label %match_case6 + i32 36, label %match_case7 + i32 34, label %match_case8 + i32 58, label %match_case9 + ] + +match_case: + call void @foo() + unreachable + +match_case1: + call void @foo() + unreachable + +match_case2: + call void @foo() + unreachable + +match_case3: + call void @foo() + unreachable + +match_case4: + call void @foo() + unreachable + +match_case5: + call void @foo() + unreachable + +match_case6: + call void @foo() + unreachable + +match_case7: + call void @foo() + unreachable + +match_case8: + call void @foo() + unreachable + +match_case9: + call void @foo() + unreachable + +join: + ret void +} +