Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -250,7 +250,7 @@ // Alias instruction mapping movr0 to xor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, - isPseudo = 1 in + isPseudo = 1, AddedComplexity = 20 in def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; @@ -262,6 +262,15 @@ let AddedComplexity = 20; } +let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in { +// AddedComplexity is selected to be higher than MOV64ri but lower than MOV32r0. +def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "", + [(set GR32:$dst, i32immSExt8:$src)]>, Requires<[OptForSize]>; +def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "", + [(set GR64:$dst, i64immSExt8:$src)]>, Requires<[OptForSize]>; +// XXX: Is leaving out the instruction itinerary class and Schedule OK? +} + // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however // that would make it more difficult to rematerialize. Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -23,6 +23,7 @@ #include "X86GenInstrInfo.inc" namespace llvm { + class MachineInstrBuilder; class X86RegisterInfo; class X86Subtarget; @@ -564,6 +565,9 @@ /// operand and follow operands form a reference to the stack frame. bool isFrameOperand(const MachineInstr *MI, unsigned int Op, int &FrameIndex) const; + + /// Expand the MOVImmSExti8 pseudo-instructions. + bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const; }; } // End llvm namespace Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DerivedTypes.h" @@ -5230,6 +5231,39 @@ return true; } +bool X86InstrInfo::ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const { + MachineBasicBlock &MBB = *MIB->getParent(); + DebugLoc DL = MIB->getDebugLoc(); + int64_t Imm = MIB->getOperand(1).getImm(); + assert(Imm != 0 && "Using push/pop for 0 is not efficient."); + MachineBasicBlock::iterator I = MIB.getInstr(); + + switch (MIB->getOpcode()) { + case X86::MOV32ImmSExti8: + BuildMI(MBB, I, DL, get(X86::PUSH32i8)).addImm(Imm); + MIB->setDesc(get(X86::POP32r)); + break; + case X86::MOV64ImmSExti8: + BuildMI(MBB, I, DL, get(X86::PUSH64i8)).addImm(Imm); + MIB->setDesc(get(X86::POP64r)); + break; + default: + llvm_unreachable("Unxpected opcode!"); + } + + // Build CFI if necessary. + const X86FrameLowering *TFL = Subtarget.getFrameLowering(); + MachineFunction &MF = *MBB.getParent(); + if (!TFL->hasFP(MF) && MF.getMMI().usePreciseUnwindInfo()) { + TFL->BuildCFI(MBB, I, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, 4)); + TFL->BuildCFI(MBB, std::next(I), DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, -4)); + } + + return true; +} + // LoadStackGuard has so far only been implemented for 64-bit MachO. Different // code sequence is needed for other targets. static void expandLoadStackGuard(MachineInstrBuilder &MIB, @@ -5258,6 +5292,9 @@ switch (MI->getOpcode()) { case X86::MOV32r0: return Expand2AddrUndef(MIB, get(X86::XOR32rr)); + case X86::MOV32ImmSExti8: + case X86::MOV64ImmSExti8: + return ExpandMOVImmSExti8(MIB); case X86::SETB_C8r: return Expand2AddrUndef(MIB, get(X86::SBB8rr)); case X86::SETB_C16r: Index: test/CodeGen/X86/mov-32imm-sext-i8.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/mov-32imm-sext-i8.ll @@ -0,0 +1,96 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s + +define i32 @test_32_nooptsize() { +entry: + ret i32 -1 + +; Check that we use regular MOV when not optimizing for size. +; CHECK-LABEL: test_32_nooptsize: +; CHECK: movl $-1, %eax +; CHECK: retq +} + +define i32 @test_32() optsize { +entry: + ret i32 -1 + +; When optimizing for size, use PUSH/POP for 8-bit immediates, +; as it encodes 2 bytes smaller than MOV. +; CHECK-LABEL: test_32: +; CHECK: pushl $-1 +; CHECK: popl %eax +; CHECK: retq +} + +define i32 @test_32_not() optsize { +entry: + ret i32 128 + +; While 128 does fit in 8 bits, we can't use PUSH/POP because that +; would sign-extend it to a different value. +; CHECK-LABEL: test_32_not: +; CHECK: movl $128, %eax +; CHECK: retq +} + +define i64 @test_64() optsize { +entry: + ret i64 127 + +; PUSH/POP is used on 64-bit too. +; CHECK-LABEL: test_64: +; CHECK: pushq $127 +; CHECK: popq %rax +; CHECK: retq +} + +define i64 @test_64_nooptsize() { +entry: + ret i64 127 + +; PUSH/POP only used when optimizing for size. +; CHECK-LABEL: test_64_nooptsize: +; CHECK: movl $127, %eax +; CHECK: retq +} + +define i64 @test_minsize() minsize { +entry: + ret i64 5 + +; Minsize implies optsize. +; CHECK-LABEL: test_minsize: +; CHECK: pushq $5 +; CHECK: popq %rax +; CHECK: retq +} + +define zeroext i8 @test_zero() minsize { +entry: + ret i8 0 + +; In this function we'd select push/pop instead of xor unless we make sure the +; latter has higher AddedComplexity. +; CHECK-LABEL: test_zero: +; CHECK: xorl %eax, %eax +; CHECK: retq +} + +define i32 @test_cfi() optsize !dbg !4 { +entry: + ret i32 42 +; CHECK-LABEL: test_cfi: +; CHECK: pushl $42 +; CHECK: .cfi_adjust_cfa_offset 4 +; CHECK: popl %eax +; CHECK: .cfi_adjust_cfa_offset -4 +; CHECK: retq +} + +!llvm.dbg.cu = !{!0} +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1) +!1 = !DIFile(filename: "a.c", directory: "/") +!llvm.module.flags = !{!6, !7} +!4 = distinct !DISubprogram() +!6 = !{i32 2, !"Dwarf Version", i32 4} +!7 = !{i32 2, !"Debug Info Version", i32 3} Index: test/CodeGen/X86/movtopush.ll =================================================================== --- test/CodeGen/X86/movtopush.ll +++ test/CodeGen/X86/movtopush.ll @@ -114,7 +114,8 @@ ; We support weird calling conventions ; NORMAL-LABEL: test4: -; NORMAL: movl $2, %eax +; NORMAL: pushl $2 +; NORMAL: popl %eax ; NORMAL-NEXT: pushl $4 ; NORMAL-NEXT: pushl $3 ; NORMAL-NEXT: pushl $1 Index: test/CodeGen/X86/powi.ll =================================================================== --- test/CodeGen/X86/powi.ll +++ test/CodeGen/X86/powi.ll @@ -20,18 +20,18 @@ define double @pow_wrapper_optsize(double %a) optsize { ; CHECK-LABEL: pow_wrapper_optsize: ; CHECK: # BB#0: -; CHECK-NEXT: movl $15, %edi +; CHECK-NEXT: movl $128, %edi ; CHECK-NEXT: jmp - %ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; [#uses=1] + %ret = tail call double @llvm.powi.f64(double %a, i32 128) nounwind ; [#uses=1] ret double %ret } define double @pow_wrapper_minsize(double %a) minsize { ; CHECK-LABEL: pow_wrapper_minsize: ; CHECK: # BB#0: -; CHECK-NEXT: movl $15, %edi +; CHECK-NEXT: movl $128, %edi ; CHECK-NEXT: jmp - %ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; [#uses=1] + %ret = tail call double @llvm.powi.f64(double %a, i32 128) nounwind ; [#uses=1] ret double %ret }