Index: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp @@ -175,6 +175,7 @@ bool optimizeExtUses(Instruction *I); bool optimizeSelectInst(SelectInst *SI); bool optimizeShuffleVectorInst(ShuffleVectorInst *SI); + bool optimizeSwitchInst(SwitchInst *CI); bool optimizeExtractElementInst(Instruction *Inst); bool dupRetToEnableTailCallOpts(BasicBlock *BB); bool placeDbgValues(Function &F); @@ -4399,6 +4400,49 @@ return MadeChange; } +bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { + if (!TLI || !DL) + return false; + + Value *Cond = SI->getCondition(); + Type *OldType = Cond->getType(); + LLVMContext &Context = Cond->getContext(); + MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType)); + unsigned RegWidth = RegType.getSizeInBits(); + + if (RegWidth <= cast(OldType)->getBitWidth()) + return false; + + // If the register width is greater than the type width, expand the condition + // of the switch instruction and each case constant to the width of the + // register. By widening the type of the switch condition, subsequent + // comparisons (for case comparisons) will not need to be extended to the + // preferred register width, so we will potentially eliminate N-1 extends, + // where N is the number of cases in the switch. + auto *NewType = Type::getIntNTy(Context, RegWidth); + + // Zero-extend the switch condition and case constants unless the switch + // condition is a function argument that is already being sign-extended. + // In that case, we can avoid an unnecessary mask/extension by sign-extending + // everything instead. + Instruction::CastOps ExtType = Instruction::ZExt; + if (auto *Arg = dyn_cast(Cond)) + if (Arg->hasSExtAttr()) + ExtType = Instruction::SExt; + + auto *ExtInst = CastInst::Create(ExtType, Cond, NewType); + ExtInst->insertBefore(SI); + SI->setCondition(ExtInst); + for (SwitchInst::CaseIt Case : SI->cases()) { + APInt NarrowConst = Case.getCaseValue()->getValue(); + APInt WideConst = (ExtType == Instruction::ZExt) ? + NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth); + Case.setValue(ConstantInt::get(Context, WideConst)); + } + + return true; +} + namespace { /// \brief Helper class to promote a scalar operation to a vector one. /// This class is used to move downward extractelement transition. @@ -4871,6 +4915,9 @@ if (ShuffleVectorInst *SVI = dyn_cast(I)) return optimizeShuffleVectorInst(SVI); + if (auto *Switch = dyn_cast(I)) + return optimizeSwitchInst(Switch); + if (isa(I)) return optimizeExtractElementInst(I); Index: llvm/trunk/test/Transforms/CodeGenPrepare/widen_switch.ll =================================================================== --- llvm/trunk/test/Transforms/CodeGenPrepare/widen_switch.ll +++ llvm/trunk/test/Transforms/CodeGenPrepare/widen_switch.ll @@ -0,0 +1,107 @@ +;; PowerPC is arbitralily chosen as a 32/64-bit RISC representative to show the transform in all tests. +;; x86 is chosen to show that the transform may differ when 8-bit and 16-bit registers are available. + +; RUN: opt < %s -codegenprepare -S -mtriple=powerpc64-unknown-unknown | FileCheck %s --check-prefix=PPC --check-prefix=ALL +; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X86 --check-prefix=ALL + +; PPC widens to 32-bit; no change for x86 because 16-bit registers are part of the architecture. + +define i32 @widen_switch_i16(i32 %a) { +entry: + %trunc = trunc i32 %a to i16 + switch i16 %trunc, label %sw.default [ + i16 1, label %sw.bb0 + i16 -1, label %sw.bb1 + ] + +sw.bb0: + br label %return + +sw.bb1: + br label %return + +sw.default: + br label %return + +return: + %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ] + ret i32 %retval + +; ALL-LABEL: @widen_switch_i16( +; PPC: %0 = zext i16 %trunc to i32 +; PPC-NEXT: switch i32 %0, label %sw.default [ +; PPC-NEXT: i32 1, label %return +; PPC-NEXT: i32 65535, label %sw.bb1 +; +; X86: %trunc = trunc i32 %a to i16 +; X86-NEXT: switch i16 %trunc, label %sw.default [ +; X86-NEXT: i16 1, label %return +; X86-NEXT: i16 -1, label %sw.bb1 +} + +; Both architectures widen to 32-bit from a smaller, non-native type. + +define i32 @widen_switch_i17(i32 %a) { +entry: + %trunc = trunc i32 %a to i17 + switch i17 %trunc, label %sw.default [ + i17 10, label %sw.bb0 + i17 -1, label %sw.bb1 + ] + +sw.bb0: + br label %return + +sw.bb1: + br label %return + +sw.default: + br label %return + +return: + %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ] + ret i32 %retval + +; ALL-LABEL: @widen_switch_i17( +; ALL: %0 = zext i17 %trunc to i32 +; ALL-NEXT: switch i32 %0, label %sw.default [ +; ALL-NEXT: i32 10, label %return +; ALL-NEXT: i32 131071, label %sw.bb1 +} + +; If the switch condition is a sign-extended function argument, then the +; condition and cases should be sign-extended rather than zero-extended +; because the sign-extension can be optimized away. + +define i32 @widen_switch_i16_sext(i2 signext %a) { +entry: + switch i2 %a, label %sw.default [ + i2 1, label %sw.bb0 + i2 -1, label %sw.bb1 + ] + +sw.bb0: + br label %return + +sw.bb1: + br label %return + +sw.default: + br label %return + +return: + %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ] + ret i32 %retval + +; ALL-LABEL: @widen_switch_i16_sext( +; PPC: %0 = sext i2 %a to i32 +; PPC-NEXT: switch i32 %0, label %sw.default [ +; PPC-NEXT: i32 1, label %return +; PPC-NEXT: i32 -1, label %sw.bb1 +; +; X86: %0 = sext i2 %a to i8 +; X86-NEXT: switch i8 %0, label %sw.default [ +; X86-NEXT: i8 1, label %return +; X86-NEXT: i8 -1, label %sw.bb1 +} +