Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -262,6 +262,22 @@ let AddedComplexity = 20; } +let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode], + AddedComplexity = 1 in { + // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC, + // which only require 3 bytes compared to MOV32ri which requires 5. + let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in { + def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "", + [(set GR32:$dst, 1)]>; + def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "", + [(set GR32:$dst, -1)]>; + } + + // MOV16ri is 4 bytes, so the instructions above are smaller. + def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>; + def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>; +} + // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however // that would make it more difficult to rematerialize. Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -2472,10 +2472,12 @@ unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, const TargetRegisterInfo &TRI) const { - // MOV32r0 is implemented with a xor which clobbers condition code. - // Re-materialize it as movri instructions to avoid side effects. unsigned Opc = Orig->getOpcode(); - if (Opc == X86::MOV32r0 && !isSafeToClobberEFLAGS(MBB, I)) { + + if ((Opc == X86::MOV32r0 || Opc == X86::MOV32r1 || Opc == X86::MOV32r_1) && + !isSafeToClobberEFLAGS(MBB, I)) { + // These instructions clobber EFLAGS. Re-materialize as MOV32ri to avoid + // side effects. DebugLoc DL = Orig->getDebugLoc(); BuildMI(MBB, I, DL, get(X86::MOV32ri)).addOperand(Orig->getOperand(0)) .addImm(0); @@ -5267,6 +5269,24 @@ return true; } +static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, + bool MinusOne) { + MachineBasicBlock &MBB = *MIB->getParent(); + DebugLoc DL = MIB->getDebugLoc(); + unsigned Reg = MIB->getOperand(0).getReg(); + + // Insert the XOR. + BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + + // Turn the pseudo into an INC or DEC. + MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r)); + MIB.addReg(Reg); + + return true; +} + // LoadStackGuard has so far only been implemented for 64-bit MachO. Different // code sequence is needed for other targets. static void expandLoadStackGuard(MachineInstrBuilder &MIB, @@ -5295,6 +5315,10 @@ switch (MI->getOpcode()) { case X86::MOV32r0: return Expand2AddrUndef(MIB, get(X86::XOR32rr)); + case X86::MOV32r1: + return expandMOV32r1(MIB, *this, /*MinusOne=*/ false); + case X86::MOV32r_1: + return expandMOV32r1(MIB, *this, /*MinusOne=*/ true); case X86::SETB_C8r: return Expand2AddrUndef(MIB, get(X86::SBB8rr)); case X86::SETB_C16r: Index: test/CodeGen/X86/materialize-one.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/materialize-one.ll @@ -0,0 +1,71 @@ +; RUN: llc -mtriple=i686-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=CHECK32 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=CHECK64 + +define i32 @one32() optsize { +entry: + ret i32 1 + +; CHECK32-LABEL: one32 +; CHECK32: xorl %eax, %eax +; CHECK32-NEXT: incl %eax +; CHECK32-NEXT: ret + +; FIXME: Figure out the best approach in 64-bit mode. +; CHECK64-LABEL: one32 +; CHECK64: movl $1, %eax +; CHECK64-NEXT: retq +} + +define i32 @minus_one32() optsize { +entry: + ret i32 -1 + +; CHECK32-LABEL: minus_one32 +; CHECK32: xorl %eax, %eax +; CHECK32-NEXT: decl %eax +; CHECK32-NEXT: ret +} + +define i16 @one16() optsize { +entry: + ret i16 1 + +; CHECK32-LABEL: one16 +; CHECK32: xorl %eax, %eax +; CHECK32-NEXT: incl %eax +; CHECK32-NEXT: retl +} + +define i16 @minus_one16() optsize { +entry: + ret i16 -1 + +; CHECK32-LABEL: minus_one16 +; CHECK32: xorl %eax, %eax +; CHECK32-NEXT: decl %eax +; CHECK32-NEXT: retl +} + +define i32 @test_rematerialization() optsize { +entry: + ; Materialize -1 (thiscall forces it into %ecx). + tail call x86_thiscallcc void @f(i32 -1) + + ; Clobber all registers except %esp, leaving nowhere to store the -1 besides + ; spilling it to the stack. + tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"() + + ; -1 should be re-materialized here instead of getting spilled above. + ret i32 -1 + +; CHECK32-LABEL: test_rematerialization +; CHECK32: xorl %ecx, %ecx +; CHECK32-NEXT: decl %ecx +; CHECK32: calll +; CHECK32: xorl %eax, %eax +; CHECK32-NEXT: decl %eax +; CHECK32-NOT: %eax +; CHECK32: retl +} + +declare x86_thiscallcc void @f(i32)