diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1222,6 +1222,14 @@ uint64_t Range, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const; + /// Return a preferred type for switch conditions. Return `nullptr` + /// when no change should be performed. `ExtOp` is set to + /// `Instruction::SExt` or `Instruction::ZExt` when not returning `nullptr`. + /// By default this returns `getRegisterType` when it is bigger than the + /// current type. + virtual IntegerType *shouldExtendSwitch(const SwitchInst &SI, + Instruction::CastOps *ExtOp) const; + /// Return true if lowering to a bit test is suitable for a set of case /// clusters which contains \p NumDests unique destinations, \p Low and /// \p High as its lowest and highest case values, and expects \p NumCmps diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -6980,49 +6980,22 @@ } bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { - Value *Cond = SI->getCondition(); - Type *OldType = Cond->getType(); - LLVMContext &Context = Cond->getContext(); - EVT OldVT = TLI->getValueType(*DL, OldType); - MVT RegType = TLI->getRegisterType(Context, OldVT); - unsigned RegWidth = RegType.getSizeInBits(); - - if (RegWidth <= cast(OldType)->getBitWidth()) + Instruction::CastOps ExtOp; + IntegerType *ExtType = TLI->shouldExtendSwitch(*SI, &ExtOp); + if (!ExtType) return false; - // If the register width is greater than the type width, expand the condition - // of the switch instruction and each case constant to the width of the - // register. By widening the type of the switch condition, subsequent - // comparisons (for case comparisons) will not need to be extended to the - // preferred register width, so we will potentially eliminate N-1 extends, - // where N is the number of cases in the switch. - auto *NewType = Type::getIntNTy(Context, RegWidth); - - // Extend the switch condition and case constants using the target preferred - // extend unless the switch condition is a function argument with an extend - // attribute. In that case, we can avoid an unnecessary mask/extension by - // matching the argument extension instead. - Instruction::CastOps ExtType = Instruction::ZExt; - // Some targets prefer SExt over ZExt. - if (TLI->isSExtCheaperThanZExt(OldVT, RegType)) - ExtType = Instruction::SExt; - - if (auto *Arg = dyn_cast(Cond)) { - if (Arg->hasSExtAttr()) - ExtType = Instruction::SExt; - if (Arg->hasZExtAttr()) - ExtType = Instruction::ZExt; - } - - auto *ExtInst = CastInst::Create(ExtType, Cond, NewType); + Value *Condition = SI->getCondition(); + Instruction *ExtInst = CastInst::Create(ExtOp, Condition, ExtType); ExtInst->insertBefore(SI); ExtInst->setDebugLoc(SI->getDebugLoc()); SI->setCondition(ExtInst); for (auto Case : SI->cases()) { APInt NarrowConst = Case.getCaseValue()->getValue(); - APInt WideConst = (ExtType == Instruction::ZExt) ? - NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth); - Case.setValue(ConstantInt::get(Context, WideConst)); + unsigned Width = ExtType->getBitWidth(); + APInt WideConst = (ExtOp == Instruction::ZExt) ? NarrowConst.zext(Width) + : NarrowConst.sext(Width); + Case.setValue(ConstantInt::get(ExtType->getContext(), WideConst)); } return true; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1602,6 +1602,47 @@ (NumCases * 100 >= Range * MinDensity); } +IntegerType * +TargetLoweringBase::shouldExtendSwitch(const SwitchInst &SI, + Instruction::CastOps *ExtOp) const { + Value *Cond = SI.getCondition(); + Type *CondType = Cond->getType(); + LLVMContext &Context = Cond->getContext(); + const Module *M = SI.getParent()->getParent()->getParent(); + const DataLayout &DL = M->getDataLayout(); + EVT OldVT = getValueType(DL, CondType); + MVT RegType = getRegisterType(Context, OldVT); + unsigned RegWidth = RegType.getSizeInBits(); + + if (RegWidth <= CondType->getIntegerBitWidth()) + return nullptr; + + // If the register width is greater than the type width, expand the condition + // of the switch instruction and each case constant to the width of the + // register. By widening the type of the switch condition, subsequent + // comparisons (for case comparisons) will not need to be extended to the + // preferred register width, so we will potentially eliminate N-1 extends, + // where N is the number of cases in the switch. + IntegerType *ExtType = Type::getIntNTy(Context, RegWidth); + + // Extend the switch condition and case constants using the target preferred + // extend unless the switch condition is a function argument with an extend + // attribute. In that case, we can avoid an unnecessary mask/extension by + // matching the argument extension instead. + *ExtOp = Instruction::ZExt; + // Some targets prefer SExt over ZExt. + if (isSExtCheaperThanZExt(OldVT, RegType)) + *ExtOp = Instruction::SExt; + + if (Argument *Arg = dyn_cast(Cond)) { + if (Arg->hasSExtAttr()) + *ExtOp = Instruction::SExt; + if (Arg->hasZExtAttr()) + *ExtOp = Instruction::ZExt; + } + return ExtType; +} + /// Get the EVTs and ArgFlags collections that represent the legalized return /// type of the given function. This does not require a DAG or a return value, /// and is suitable for use before any DAGs for the function are constructed. diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1316,6 +1316,9 @@ /// Returns true if lowering to a jump table is allowed. bool areJTsAllowed(const Function *Fn) const override; + IntegerType *shouldExtendSwitch(const SwitchInst &SI, + Instruction::CastOps *ExtOp) const override; + /// If true, then instruction selection should /// seek to shrink the FP constant of the specified type to a smaller type /// in order to save space and / or reduce runtime. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33719,6 +33719,32 @@ return TargetLowering::areJTsAllowed(Fn); } +static Instruction::CastOps getPreferredExtOp(Value* Value) { + if (Argument *Arg = dyn_cast(Value)) { + if (Arg->hasSExtAttr()) + return Instruction::SExt; + } + return Instruction::ZExt; +} + +IntegerType* X86TargetLowering::shouldExtendSwitch(const SwitchInst &SI, + Instruction::CastOps *ExtOp) const { + // The switch operand will be part of an address computation, so we prefer + // 32 or 64bit conditions. + Value* Condition = SI.getCondition(); + Type* ConditionType = Condition->getType(); + unsigned BitWidth = ConditionType->getIntegerBitWidth(); + if (BitWidth < 32) { + *ExtOp = getPreferredExtOp(Condition); + return Type::getInt32Ty(ConditionType->getContext()); + } + if (Subtarget.is64Bit() && BitWidth > 32 && BitWidth < 64) { + *ExtOp = getPreferredExtOp(Condition); + return Type::getInt64Ty(ConditionType->getContext()); + } + return nullptr; +} + //===----------------------------------------------------------------------===// // X86 Scheduler Hooks //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll b/llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll --- a/llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll +++ b/llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll @@ -18,11 +18,11 @@ ; CHECK-NEXT: LBB0_2: ## %bb1 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movzbl (%eax), %ecx -; CHECK-NEXT: cmpb $12, %cl +; CHECK-NEXT: cmpl $12, %ecx ; CHECK-NEXT: je LBB0_1 ; CHECK-NEXT: ## %bb.3: ## %bb1 ; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: cmpb $42, %cl +; CHECK-NEXT: cmpl $42, %ecx ; CHECK-NEXT: je LBB0_1 ; CHECK-NEXT: ## %bb.4: ## %bb3 ; CHECK-NEXT: movb $4, 2(%eax) diff --git a/llvm/test/CodeGen/X86/SwitchLowering.ll b/llvm/test/CodeGen/X86/SwitchLowering.ll --- a/llvm/test/CodeGen/X86/SwitchLowering.ll +++ b/llvm/test/CodeGen/X86/SwitchLowering.ll @@ -18,11 +18,11 @@ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movzbl (%esi,%edi), %eax ; CHECK-NEXT: incl %edi -; CHECK-NEXT: cmpb $120, %al +; CHECK-NEXT: cmpl $120, %eax ; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # %bb.2: # %bb ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: .LBB0_3: # %bb7 ; CHECK-NEXT: movzbl %al, %eax diff --git a/llvm/test/CodeGen/X86/conditional-tailcall.ll b/llvm/test/CodeGen/X86/conditional-tailcall.ll --- a/llvm/test/CodeGen/X86/conditional-tailcall.ll +++ b/llvm/test/CodeGen/X86/conditional-tailcall.ll @@ -248,7 +248,7 @@ ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x20] ; CHECK32-NEXT: movl (%eax), %eax # encoding: [0x8b,0x00] ; CHECK32-NEXT: movl -24(%eax), %edx # encoding: [0x8b,0x50,0xe8] -; CHECK32-NEXT: leal (%eax,%edx), %ebp # encoding: [0x8d,0x2c,0x10] +; CHECK32-NEXT: leal (%eax,%edx), %ecx # encoding: [0x8d,0x0c,0x10] ; CHECK32-NEXT: xorl %ebx, %ebx # encoding: [0x31,0xdb] ; CHECK32-NEXT: pushl $2 # encoding: [0x6a,0x02] ; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 @@ -259,76 +259,80 @@ ; CHECK32-NEXT: .LBB3_1: # %for.cond ; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK32-NEXT: testl %edx, %edx # encoding: [0x85,0xd2] -; CHECK32-NEXT: je .LBB3_13 # encoding: [0x74,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_13-1, kind: FK_PCRel_1 +; CHECK32-NEXT: je .LBB3_14 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_14-1, kind: FK_PCRel_1 ; CHECK32-NEXT: # %bb.2: # %for.body ; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK32-NEXT: cmpl $2, %ebx # encoding: [0x83,0xfb,0x02] -; CHECK32-NEXT: je .LBB3_11 # encoding: [0x74,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; CHECK32-NEXT: je .LBB3_12 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1 ; CHECK32-NEXT: # %bb.3: # %for.body ; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK32-NEXT: cmpl $1, %ebx # encoding: [0x83,0xfb,0x01] -; CHECK32-NEXT: je .LBB3_9 # encoding: [0x74,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; CHECK32-NEXT: je .LBB3_10 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 ; CHECK32-NEXT: # %bb.4: # %for.body ; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK32-NEXT: testl %ebx, %ebx # encoding: [0x85,0xdb] -; CHECK32-NEXT: jne .LBB3_10 # encoding: [0x75,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK32-NEXT: jne .LBB3_11 # encoding: [0x75,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 ; CHECK32-NEXT: # %bb.5: # %sw.bb ; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; CHECK32-NEXT: movzbl (%eax), %ecx # encoding: [0x0f,0xb6,0x08] -; CHECK32-NEXT: cmpl $43, %ecx # encoding: [0x83,0xf9,0x2b] +; CHECK32-NEXT: movzbl (%eax), %ebp # encoding: [0x0f,0xb6,0x28] +; CHECK32-NEXT: cmpl $43, %ebp # encoding: [0x83,0xfd,0x2b] ; CHECK32-NEXT: movl %edi, %ebx # encoding: [0x89,0xfb] -; CHECK32-NEXT: je .LBB3_10 # encoding: [0x74,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK32-NEXT: je .LBB3_11 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 ; CHECK32-NEXT: # %bb.6: # %sw.bb ; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; CHECK32-NEXT: cmpb $45, %cl # encoding: [0x80,0xf9,0x2d] +; CHECK32-NEXT: cmpl $45, %ebp # encoding: [0x83,0xfd,0x2d] ; CHECK32-NEXT: movl %edi, %ebx # encoding: [0x89,0xfb] -; CHECK32-NEXT: je .LBB3_10 # encoding: [0x74,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 -; CHECK32-NEXT: jmp .LBB3_7 # encoding: [0xeb,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1 -; CHECK32-NEXT: .LBB3_11: # %sw.bb22 +; CHECK32-NEXT: je .LBB3_11 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.7: # %if.else ; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; CHECK32-NEXT: movzbl (%eax), %ecx # encoding: [0x0f,0xb6,0x08] -; CHECK32-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0] -; CHECK32-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a] -; CHECK32-NEXT: movl %esi, %ebx # encoding: [0x89,0xf3] -; CHECK32-NEXT: jb .LBB3_10 # encoding: [0x72,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 -; CHECK32-NEXT: jmp .LBB3_12 # encoding: [0xeb,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1 -; CHECK32-NEXT: .LBB3_9: # %sw.bb14 -; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; CHECK32-NEXT: movzbl (%eax), %ecx # encoding: [0x0f,0xb6,0x08] -; CHECK32-NEXT: .LBB3_7: # %if.else -; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; CHECK32-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0] -; CHECK32-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a] -; CHECK32-NEXT: movl %esi, %ebx # encoding: [0x89,0xf3] -; CHECK32-NEXT: jae .LBB3_8 # encoding: [0x73,A] +; CHECK32-NEXT: addl $-48, %ebp # encoding: [0x83,0xc5,0xd0] +; CHECK32-NEXT: cmpl $10, %ebp # encoding: [0x83,0xfd,0x0a] +; CHECK32-NEXT: jmp .LBB3_8 # encoding: [0xeb,A] ; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1 -; CHECK32-NEXT: .LBB3_10: # %for.inc +; CHECK32-NEXT: .LBB3_12: # %sw.bb22 +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: movzbl (%eax), %ebx # encoding: [0x0f,0xb6,0x18] +; CHECK32-NEXT: addl $-48, %ebx # encoding: [0x83,0xc3,0xd0] +; CHECK32-NEXT: cmpl $10, %ebx # encoding: [0x83,0xfb,0x0a] +; CHECK32-NEXT: movl %esi, %ebx # encoding: [0x89,0xf3] +; CHECK32-NEXT: jb .LBB3_11 # encoding: [0x72,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; CHECK32-NEXT: jmp .LBB3_13 # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_13-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB3_10: # %sw.bb14 +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: movzbl (%eax), %ebx # encoding: [0x0f,0xb6,0x18] +; CHECK32-NEXT: addl $-48, %ebx # encoding: [0x83,0xc3,0xd0] +; CHECK32-NEXT: cmpl $10, %ebx # encoding: [0x83,0xfb,0x0a] +; CHECK32-NEXT: .LBB3_8: # %if.else +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: movl %esi, %ebx # encoding: [0x89,0xf3] +; CHECK32-NEXT: jae .LBB3_9 # encoding: [0x73,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB3_11: # %for.inc ; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK32-NEXT: incl %eax # encoding: [0x40] ; CHECK32-NEXT: decl %edx # encoding: [0x4a] ; CHECK32-NEXT: jmp .LBB3_1 # encoding: [0xeb,A] ; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 -; CHECK32-NEXT: .LBB3_13: +; CHECK32-NEXT: .LBB3_14: ; CHECK32-NEXT: cmpl $2, %ebx # encoding: [0x83,0xfb,0x02] ; CHECK32-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] -; CHECK32-NEXT: jmp .LBB3_14 # encoding: [0xeb,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_14-1, kind: FK_PCRel_1 -; CHECK32-NEXT: .LBB3_8: +; CHECK32-NEXT: jmp .LBB3_15 # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_15-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB3_9: ; CHECK32-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] -; CHECK32-NEXT: .LBB3_14: # %cleanup.thread +; CHECK32-NEXT: .LBB3_15: # %cleanup.thread ; CHECK32-NEXT: # kill: def $al killed $al killed $eax ; CHECK32-NEXT: addl $12, %esp # encoding: [0x83,0xc4,0x0c] ; CHECK32-NEXT: .cfi_def_cfa_offset 20 -; CHECK32-NEXT: .LBB3_15: # %cleanup.thread +; CHECK32-NEXT: .LBB3_16: # %cleanup.thread ; CHECK32-NEXT: popl %esi # encoding: [0x5e] ; CHECK32-NEXT: .cfi_def_cfa_offset 16 ; CHECK32-NEXT: popl %edi # encoding: [0x5f] @@ -338,11 +342,11 @@ ; CHECK32-NEXT: popl %ebp # encoding: [0x5d] ; CHECK32-NEXT: .cfi_def_cfa_offset 4 ; CHECK32-NEXT: retl # encoding: [0xc3] -; CHECK32-NEXT: .LBB3_12: # %if.else28 +; CHECK32-NEXT: .LBB3_13: # %if.else28 ; CHECK32-NEXT: .cfi_def_cfa_offset 32 ; CHECK32-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] ; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 -; CHECK32-NEXT: pushl %ebp # encoding: [0x55] +; CHECK32-NEXT: pushl %ecx # encoding: [0x51] ; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 ; CHECK32-NEXT: pushl %eax # encoding: [0x50] ; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 @@ -350,8 +354,8 @@ ; CHECK32-NEXT: # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-4, kind: FK_PCRel_4 ; CHECK32-NEXT: addl $28, %esp # encoding: [0x83,0xc4,0x1c] ; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 -; CHECK32-NEXT: jmp .LBB3_15 # encoding: [0xeb,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_15-1, kind: FK_PCRel_1 +; CHECK32-NEXT: jmp .LBB3_16 # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_16-1, kind: FK_PCRel_1 ; ; CHECK64-LABEL: pr31257: ; CHECK64: # %bb.0: # %entry @@ -396,7 +400,7 @@ ; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 ; CHECK64-NEXT: # %bb.6: # %sw.bb ; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 -; CHECK64-NEXT: cmpb $45, %dl # encoding: [0x80,0xfa,0x2d] +; CHECK64-NEXT: cmpl $45, %edx # encoding: [0x83,0xfa,0x2d] ; CHECK64-NEXT: movl %r8d, %ecx # encoding: [0x44,0x89,0xc1] ; CHECK64-NEXT: je .LBB3_11 # encoding: [0x74,A] ; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 @@ -478,7 +482,7 @@ ; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 ; WIN64-NEXT: # %bb.6: # %sw.bb ; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 -; WIN64-NEXT: cmpb $45, %r9b # encoding: [0x41,0x80,0xf9,0x2d] +; WIN64-NEXT: cmpl $45, %r9d # encoding: [0x41,0x83,0xf9,0x2d] ; WIN64-NEXT: je .LBB3_10 # encoding: [0x74,A] ; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 ; WIN64-NEXT: jmp .LBB3_8 # encoding: [0xeb,A] diff --git a/llvm/test/CodeGen/X86/switch-phi-const.ll b/llvm/test/CodeGen/X86/switch-phi-const.ll --- a/llvm/test/CodeGen/X86/switch-phi-const.ll +++ b/llvm/test/CodeGen/X86/switch-phi-const.ll @@ -97,27 +97,27 @@ define void @switch_trunc_phi_const(i32 %x) { ; CHECK-LABEL: switch_trunc_phi_const: ; CHECK: # %bb.0: # %bb0 -; CHECK-NEXT: movl $3895, %r8d # imm = 0xF37 +; CHECK-NEXT: movzbl %dil, %r8d +; CHECK-NEXT: movl $3895, %ecx # imm = 0xF37 ; CHECK-NEXT: movl $42, %esi ; CHECK-NEXT: movl $13, %edx -; CHECK-NEXT: movl $5, %eax -; CHECK-NEXT: movl $1, %ecx -; CHECK-NEXT: decb %dil -; CHECK-NEXT: movzbl %dil, %edi -; CHECK-NEXT: cmpb $54, %dil +; CHECK-NEXT: movl $5, %edi +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: decl %r8d +; CHECK-NEXT: cmpl $54, %r8d ; CHECK-NEXT: ja .LBB1_8 ; CHECK-NEXT: # %bb.1: # %bb0 -; CHECK-NEXT: jmpq *.LJTI1_0(,%rdi,8) +; CHECK-NEXT: jmpq *.LJTI1_0(,%r8,8) ; CHECK-NEXT: .LBB1_8: # %default ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB1_2: # %case_1_loop -; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rax -; CHECK-NEXT: movq $1, (%rax) -; CHECK-NEXT: movq %rcx, %rax -; CHECK-NEXT: .LBB1_3: # %case_5 ; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rcx -; CHECK-NEXT: movq $5, (%rcx) -; CHECK-NEXT: movq %rax, %rdx +; CHECK-NEXT: movq $1, (%rcx) +; CHECK-NEXT: movq %rax, %rdi +; CHECK-NEXT: .LBB1_3: # %case_5 +; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rax +; CHECK-NEXT: movq $5, (%rax) +; CHECK-NEXT: movq %rdi, %rdx ; CHECK-NEXT: .LBB1_4: # %case_13 ; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rax ; CHECK-NEXT: movq $13, (%rax) @@ -125,15 +125,15 @@ ; CHECK-NEXT: .LBB1_5: # %case_42 ; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rax ; CHECK-NEXT: movq %rsi, (%rax) -; CHECK-NEXT: movl $55, %r8d +; CHECK-NEXT: movl $55, %ecx ; CHECK-NEXT: .LBB1_6: # %case_55 ; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rax -; CHECK-NEXT: movq %r8, (%rax) +; CHECK-NEXT: movq %rcx, (%rax) ; CHECK-NEXT: .LBB1_7: # %case_7 ; CHECK-NEXT: movq g64@GOTPCREL(%rip), %rax -; CHECK-NEXT: movq (%rax), %rcx -; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rax -; CHECK-NEXT: movq $7, (%rax) +; CHECK-NEXT: movq (%rax), %rax +; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rcx +; CHECK-NEXT: movq $7, (%rcx) ; CHECK-NEXT: jmp .LBB1_2 bb0: %x_trunc = trunc i32 %x to i8 diff --git a/llvm/test/CodeGen/X86/switch.ll b/llvm/test/CodeGen/X86/switch.ll --- a/llvm/test/CodeGen/X86/switch.ll +++ b/llvm/test/CodeGen/X86/switch.ll @@ -1417,8 +1417,8 @@ define void @int_max_table_cluster(i8 %x) { ; CHECK-LABEL: int_max_table_cluster: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addb $64, %dil -; CHECK-NEXT: cmpb $-65, %dil +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: cmpb $-9, %dil ; CHECK-NEXT: ja .LBB15_4 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: movzbl %dil, %eax @@ -2445,7 +2445,6 @@ define void @pr23738(i4 %x) { ; CHECK-LABEL: pr23738: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: andb $15, %al ; CHECK-NEXT: cmpb $11, %al @@ -2453,7 +2452,7 @@ ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: andl $15, %edi ; CHECK-NEXT: movl $2051, %eax # imm = 0x803 -; CHECK-NEXT: btq %rdi, %rax +; CHECK-NEXT: btl %edi, %eax ; CHECK-NEXT: jae .LBB23_2 ; CHECK-NEXT: # %bb.3: # %bb1 ; CHECK-NEXT: movl $1, %edi @@ -2651,21 +2650,20 @@ ; CHECK-LABEL: switch_i8: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: andb $127, %dil +; CHECK-NEXT: andl $127, %edi ; CHECK-NEXT: leal -1(%rdi), %eax -; CHECK-NEXT: cmpb $8, %al +; CHECK-NEXT: cmpl $8, %eax ; CHECK-NEXT: ja .LBB26_1 ; CHECK-NEXT: # %bb.10: -; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: jmpq *.LJTI26_0(,%rax,8) ; CHECK-NEXT: .LBB26_4: # %bb0 ; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: jmp g@PLT # TAILCALL ; CHECK-NEXT: .LBB26_1: -; CHECK-NEXT: cmpb $13, %dil +; CHECK-NEXT: cmpl $13, %edi ; CHECK-NEXT: je .LBB26_8 ; CHECK-NEXT: # %bb.2: -; CHECK-NEXT: cmpb $42, %dil +; CHECK-NEXT: cmpl $42, %edi ; CHECK-NEXT: jne .LBB26_9 ; CHECK-NEXT: # %bb.3: # %bb5 ; CHECK-NEXT: movl $5, %edi diff --git a/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll --- a/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll +++ b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll @@ -139,11 +139,10 @@ ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movzbl (%rdi), %ecx -; CHECK-NEXT: decb %cl -; CHECK-NEXT: cmpb $5, %cl +; CHECK-NEXT: decl %ecx +; CHECK-NEXT: cmpl $5, %ecx ; CHECK-NEXT: ja .LBB1_9 ; CHECK-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: movzbl %cl, %ecx ; CHECK-NEXT: jmpq *.LJTI1_0(,%rcx,8) ; CHECK-NEXT: .LBB1_3: # in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: incl %eax diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll --- a/llvm/test/CodeGen/X86/tail-opts.ll +++ b/llvm/test/CodeGen/X86/tail-opts.ll @@ -286,22 +286,22 @@ ; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: .LBB3_10: # %bb2.i3 ; CHECK-NEXT: movq 8(%rax), %rax -; CHECK-NEXT: movb 16(%rax), %cl +; CHECK-NEXT: movzbl 16(%rax), %ecx ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpb $23, %cl +; CHECK-NEXT: cmpl $23, %ecx ; CHECK-NEXT: je .LBB3_16 ; CHECK-NEXT: # %bb.11: # %bb2.i3 -; CHECK-NEXT: cmpb $16, %cl +; CHECK-NEXT: cmpl $16, %ecx ; CHECK-NEXT: je .LBB3_16 ; CHECK-NEXT: jmp .LBB3_9 ; CHECK-NEXT: .LBB3_13: # %bb2.i.i2 ; CHECK-NEXT: movq 8(%rax), %rax -; CHECK-NEXT: movb 16(%rax), %cl +; CHECK-NEXT: movzbl 16(%rax), %ecx ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpb $16, %cl +; CHECK-NEXT: cmpl $16, %ecx ; CHECK-NEXT: je .LBB3_16 ; CHECK-NEXT: # %bb.14: # %bb2.i.i2 -; CHECK-NEXT: cmpb $23, %cl +; CHECK-NEXT: cmpl $23, %ecx ; CHECK-NEXT: je .LBB3_16 ; CHECK-NEXT: jmp .LBB3_9 entry: diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/widen_switch.ll b/llvm/test/Transforms/CodeGenPrepare/X86/widen_switch.ll --- a/llvm/test/Transforms/CodeGenPrepare/X86/widen_switch.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/widen_switch.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X86 ; RUN: opt < %s -debugify -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=DEBUG -; No change for x86 because 16-bit registers are part of the architecture. +; X86 prefers i32 over i16 for address calculation. define i32 @widen_switch_i16(i32 %a) { entry: @@ -28,9 +28,10 @@ ; X86-LABEL: @widen_switch_i16( ; X86: %trunc = trunc i32 %a to i16 -; X86-NEXT: switch i16 %trunc, label %sw.default [ -; X86-NEXT: i16 1, label %sw.bb0 -; X86-NEXT: i16 -1, label %sw.bb1 +; X86-NEXT: %0 = zext i16 %trunc to i32 +; X86-NEXT: switch i32 %0, label %sw.default [ +; X86-NEXT: i32 1, label %sw.bb0 +; X86-NEXT: i32 65535, label %sw.bb1 } ; Widen to 32-bit from a smaller, non-native type. @@ -95,9 +96,9 @@ ret i32 %retval ; X86-LABEL: @widen_switch_i16_sext( -; X86: %0 = sext i2 %a to i8 -; X86-NEXT: switch i8 %0, label %sw.default [ -; X86-NEXT: i8 1, label %sw.bb0 -; X86-NEXT: i8 -1, label %sw.bb1 +; X86: %0 = sext i2 %a to i32 +; X86-NEXT: switch i32 %0, label %sw.default [ +; X86-NEXT: i32 1, label %sw.bb0 +; X86-NEXT: i32 -1, label %sw.bb1 }