Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -174,6 +174,7 @@ bool optimizeExtUses(Instruction *I); bool optimizeSelectInst(SelectInst *SI); bool optimizeShuffleVectorInst(ShuffleVectorInst *SI); + bool optimizeSwitchInst(SwitchInst *CI); bool optimizeExtractElementInst(Instruction *Inst); bool dupRetToEnableTailCallOpts(BasicBlock *BB); bool placeDbgValues(Function &F); @@ -3992,6 +3993,37 @@ return MadeChange; } +bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { + if (!TLI || !DL) + return false; + + Value *Cond = SI->getCondition(); + Type *OldType = Cond->getType(); + LLVMContext &Context = Cond->getContext(); + MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType)); + unsigned RegWidth = RegType.getSizeInBits(); + + if (RegWidth <= cast(OldType)->getBitWidth()) + return false; + + // If the register width is greater than the type width, expand the condition + // of the switch instruction and each case constant to the width of the + // register. By widening the type of the switch condition, subsequent + // comparisons (for case comparisons) will not need to be extended to the + // preferred register width, so we will potentially eliminate N-1 extends, + // where N is the number of cases in the switch. + IntegerType *NewType = Type::getIntNTy(Context, RegWidth); + CastInst *Zext = CastInst::CreateZExtOrBitCast(Cond, NewType); + Zext->insertBefore(SI); + SI->setCondition(Zext); + for (SwitchInst::CaseIt Case : SI->cases()) { + APInt WiderCaseConst = Case.getCaseValue()->getValue().zext(RegWidth); + Case.setValue(ConstantInt::get(Context, WiderCaseConst)); + } + + return true; +} + namespace { /// \brief Helper class to promote a scalar operation to a vector one. /// This class is used to move downward extractelement transition. @@ -4464,6 +4496,9 @@ if (ShuffleVectorInst *SVI = dyn_cast(I)) return optimizeShuffleVectorInst(SVI); + if (auto *Switch = dyn_cast(I)) + return optimizeSwitchInst(Switch); + if (isa(I)) return optimizeExtractElementInst(I); Index: test/Transforms/CodeGenPrepare/widen_switch.ll =================================================================== --- test/Transforms/CodeGenPrepare/widen_switch.ll +++ test/Transforms/CodeGenPrepare/widen_switch.ll @@ -0,0 +1,125 @@ +;; PowerPC is arbitralily chosen as a 32/64-bit RISC representative to show the transform in both tests. +;; x86 is chosen to show that there is no transform in the 16-bit test if 16-bit registers are available. + +; RUN: opt < %s -codegenprepare -S -mtriple=powerpc64-unknown-unknown | FileCheck %s --check-prefix=PPC --check-prefix=ALL +; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X86 --check-prefix=ALL + + +define i32 @widen_switch_i16(i32 %a) { +entry: + %trunc = trunc i32 %a to i16 + switch i16 %trunc, label %sw.default [ + i16 1, label %sw.bb0 + i16 10, label %sw.bb1 + i16 100, label %sw.bb2 + i16 1000, label %sw.bb3 + i16 -1, label %sw.bb4 + i16 -10, label %sw.bb5 + i16 -100, label %sw.bb6 + ] + +sw.bb0: + br label %return + +sw.bb1: + br label %return + +sw.bb2: + br label %return + +sw.bb3: + br label %return + +sw.bb4: + br label %return + +sw.bb5: + br label %return + +sw.bb6: + br label %return + +sw.default: + br label %return + +return: + %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ], [ 2, %sw.bb2 ], [ 3, %sw.bb3 ], [ 4, %sw.bb4 ], [ 5, %sw.bb5 ], [ 6, %sw.bb6 ] + ret i32 %retval + +; ALL-LABEL: @widen_switch_i16( +; PPC: %0 = zext i16 %trunc to i32 +; PPC-NEXT: switch i32 %0, label %sw.default [ +; PPC-NEXT: i32 1, label %return +; PPC-NEXT: i32 10, label %sw.bb1 +; PPC-NEXT: i32 100, label %sw.bb2 +; PPC-NEXT: i32 1000, label %sw.bb3 +; PPC-NEXT: i32 65535, label %sw.bb4 +; PPC-NEXT: i32 65526, label %sw.bb5 +; PPC-NEXT: i32 65436, label %sw.bb6 +; +; X86: %trunc = trunc i32 %a to i16 +; X86-NEXT: switch i16 %trunc, label %sw.default [ +; X86-NEXT: i16 1, label %return +; X86-NEXT: i16 10, label %sw.bb1 +; X86-NEXT: i16 100, label %sw.bb2 +; X86-NEXT: i16 1000, label %sw.bb3 +; X86-NEXT: i16 -1, label %sw.bb4 +; X86-NEXT: i16 -10, label %sw.bb5 +; X86-NEXT: i16 -100, label %sw.bb6 +} + +define i32 @widen_switch_i17(i32 %a) { +entry: + %trunc = trunc i32 %a to i17 + switch i17 %trunc, label %sw.default [ + i17 10, label %sw.bb0 + i17 100, label %sw.bb1 + i17 1000, label %sw.bb2 + i17 10000, label %sw.bb3 + i17 -1, label %sw.bb4 + i17 -2, label %sw.bb5 + i17 -3, label %sw.bb6 + ] + +sw.bb0: + br label %return + +sw.bb1: + br label %return + +sw.bb2: + br label %return + +sw.bb3: + br label %return + +sw.bb4: + br label %return + +sw.bb5: + br label %return + +sw.bb6: + br label %return + +sw.default: + br label %return + +return: + %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ], [ 2, %sw.bb2 ], [ 3, %sw.bb3 ], [ 4, %sw.bb4 ], [ 5, %sw.bb5 ], [ 6, %sw.bb6 ] + ret i32 %retval + +; ALL-LABEL: @widen_switch_i17( +; ALL: %0 = zext i17 %trunc to i32 +; ALL-NEXT: switch i32 %0, label %sw.default [ +; ALL-NEXT: i32 10, label %return +; ALL-NEXT: i32 100, label %sw.bb1 +; ALL-NEXT: i32 1000, label %sw.bb2 +; ALL-NEXT: i32 10000, label %sw.bb3 +; ALL-NEXT: i32 131071, label %sw.bb4 +; ALL-NEXT: i32 131070, label %sw.bb5 +; ALL-NEXT: i32 131069, label %sw.bb6 + + +} +