Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -250,10 +250,11 @@ // Alias instruction mapping movr0 to xor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, - isPseudo = 1 in + isPseudo = 1, AddedComplexity = 20 in def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; + // Other widths can also make use of the 32-bit xor, which may have a smaller // encoding and avoid partial register updates. def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>; @@ -262,6 +263,15 @@ let AddedComplexity = 20; } +let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in { +// AddedComplexity is selected to be higher than MOV64ri but lower than MOV32r0. +def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "", + [(set GR32:$dst, i32immSExt8:$src)]>, Requires<[OptForSize]>; +def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "", + [(set GR64:$dst, i64immSExt8:$src)]>, Requires<[OptForSize]>; +// XXX: Is leaving out the instruction itinerary class and Schedule OK? +} + // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however // that would make it more difficult to rematerialize. Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -5232,6 +5232,30 @@ return true; } +static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB, + const TargetInstrInfo &TII) { + MachineBasicBlock &MBB = *MIB->getParent(); + DebugLoc DL = MIB->getDebugLoc(); + int64_t Imm = MIB->getOperand(1).getImm(); + assert(Imm != 0 && "Using push/pop for 0 is not efficient."); + MachineBasicBlock::iterator I = MIB.getInstr(); + + switch (MIB->getOpcode()) { + case X86::MOV32ImmSExti8: + BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm); + MIB->setDesc(TII.get(X86::POP32r)); + break; + case X86::MOV64ImmSExti8: + BuildMI(MBB, I, DL, TII.get(X86::PUSH64i8)).addImm(Imm); + MIB->setDesc(TII.get(X86::POP64r)); + break; + default: + llvm_unreachable("Unxpected opcode!"); + } + + return true; +} + // LoadStackGuard has so far only been implemented for 64-bit MachO. Different // code sequence is needed for other targets. static void expandLoadStackGuard(MachineInstrBuilder &MIB, @@ -5260,6 +5284,9 @@ switch (MI->getOpcode()) { case X86::MOV32r0: return Expand2AddrUndef(MIB, get(X86::XOR32rr)); + case X86::MOV32ImmSExti8: + case X86::MOV64ImmSExti8: + return ExpandMOVImmSExti8(MIB, *this); case X86::SETB_C8r: return Expand2AddrUndef(MIB, get(X86::SBB8rr)); case X86::SETB_C16r: Index: test/CodeGen/X86/mov-32imm-sext-i8.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/mov-32imm-sext-i8.ll @@ -0,0 +1,77 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s + +define i32 @test_32_nooptsize() { +entry: + ret i32 -1 + +; Check that we use regular MOV when not optimizing for size. +; CHECK-LABEL: test_32_nooptsize: +; CHECK: movl $-1, %eax +; CHECK: retq +} + +define i32 @test_32() optsize { +entry: + ret i32 -1 + +; When optimizing for size, use PUSH/POP for 8-bit immediates, +; as it encodes 2 bytes smaller than MOV. +; CHECK-LABEL: test_32: +; CHECK: pushl $-1 +; CHECK: popl %eax +; CHECK: retq +} + +define i32 @test_32_not() optsize { +entry: + ret i32 128 + +; While 128 does fit in 8 bits, we can't use PUSH/POP because that +; would sign-extend it to a different value. +; CHECK-LABEL: test_32_not: +; CHECK: movl $128, %eax +; CHECK: retq +} + +define i64 @test_64() optsize { +entry: + ret i64 127 + +; PUSH/POP is used on 64-bit too. +; CHECK-LABEL: test_64: +; CHECK: pushq $127 +; CHECK: popq %rax +; CHECK: retq +} + +define i64 @test_64_nooptsize() { +entry: + ret i64 127 + +; PUSH/POP only used when optimizing for size. +; CHECK-LABEL: test_64_nooptsize: +; CHECK: movl $127, %eax +; CHECK: retq +} + +define i64 @test_minsize() minsize { +entry: + ret i64 5 + +; Minsize implies optsize. +; CHECK-LABEL: test_minsize: +; CHECK: pushq $5 +; CHECK: popq %rax +; CHECK: retq +} + +define zeroext i8 @test_zero() minsize { +entry: + ret i8 0 + +; In this function we'd select push/pop instead of xor unless we make sure the +; latter has higher AddedComplexity. +; CHECK-LABEL: test_zero: +; CHECK: xorl %eax, %eax +; CHECK: retq +} Index: test/CodeGen/X86/movtopush.ll =================================================================== --- test/CodeGen/X86/movtopush.ll +++ test/CodeGen/X86/movtopush.ll @@ -114,7 +114,8 @@ ; We support weird calling conventions ; NORMAL-LABEL: test4: -; NORMAL: movl $2, %eax +; NORMAL: pushl $2 +; NORMAL: popl %eax ; NORMAL-NEXT: pushl $4 ; NORMAL-NEXT: pushl $3 ; NORMAL-NEXT: pushl $1 Index: test/CodeGen/X86/powi.ll =================================================================== --- test/CodeGen/X86/powi.ll +++ test/CodeGen/X86/powi.ll @@ -20,18 +20,18 @@ define double @pow_wrapper_optsize(double %a) optsize { ; CHECK-LABEL: pow_wrapper_optsize: ; CHECK: # BB#0: -; CHECK-NEXT: movl $15, %edi +; CHECK-NEXT: movl $128, %edi ; CHECK-NEXT: jmp - %ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; [#uses=1] + %ret = tail call double @llvm.powi.f64(double %a, i32 128) nounwind ; [#uses=1] ret double %ret } define double @pow_wrapper_minsize(double %a) minsize { ; CHECK-LABEL: pow_wrapper_minsize: ; CHECK: # BB#0: -; CHECK-NEXT: movl $15, %edi +; CHECK-NEXT: movl $128, %edi ; CHECK-NEXT: jmp - %ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; [#uses=1] + %ret = tail call double @llvm.powi.f64(double %a, i32 128) nounwind ; [#uses=1] ret double %ret }