Index: lib/Target/X86/X86InstrCompiler.td
===================================================================
--- lib/Target/X86/X86InstrCompiler.td
+++ lib/Target/X86/X86InstrCompiler.td
@@ -250,7 +250,7 @@
 // Alias instruction mapping movr0 to xor.
 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
 let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
-    isPseudo = 1 in
+    isPseudo = 1, AddedComplexity = 20 in
 def MOV32r0  : I<0, Pseudo, (outs GR32:$dst), (ins), "",
                  [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
 
@@ -262,6 +262,15 @@
   let AddedComplexity = 20;
 }
 
+let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in {
+// AddedComplexity is selected to be higher than MOV64ri but lower than MOV32r0.
+def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "",
+                       [(set GR32:$dst, i32immSExt8:$src)]>, Requires<[OptForSize]>;
+def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",
+                       [(set GR64:$dst, i64immSExt8:$src)]>, Requires<[OptForSize]>;
+// XXX: Is leaving out the instruction itinerary class and Schedule OK?
+}
+
 // Materialize i64 constant where top 32-bits are zero. This could theoretically
 // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
 // that would make it more difficult to rematerialize.
Index: lib/Target/X86/X86InstrInfo.h
===================================================================
--- lib/Target/X86/X86InstrInfo.h
+++ lib/Target/X86/X86InstrInfo.h
@@ -23,6 +23,7 @@
 #include "X86GenInstrInfo.inc"
 
 namespace llvm {
+  class MachineInstrBuilder;
   class X86RegisterInfo;
   class X86Subtarget;
 
@@ -564,6 +565,9 @@
   /// operand and follow operands form a reference to the stack frame.
   bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
                       int &FrameIndex) const;
+
+  /// Expand the MOVImmSExti8 pseudo-instructions.
+  bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const;
 };
 
 } // End llvm namespace
Index: lib/Target/X86/X86InstrInfo.cpp
===================================================================
--- lib/Target/X86/X86InstrInfo.cpp
+++ lib/Target/X86/X86InstrInfo.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -5230,6 +5231,39 @@
   return true;
 }
 
+bool X86InstrInfo::ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const {
+  MachineBasicBlock &MBB = *MIB->getParent();
+  DebugLoc DL = MIB->getDebugLoc();
+  int64_t Imm = MIB->getOperand(1).getImm();
+  assert(Imm != 0 && "Using push/pop for 0 is not efficient.");
+  MachineBasicBlock::iterator I = MIB.getInstr();
+
+  switch (MIB->getOpcode()) {
+  case X86::MOV32ImmSExti8:
+    BuildMI(MBB, I, DL, get(X86::PUSH32i8)).addImm(Imm);
+    MIB->setDesc(get(X86::POP32r));
+    break;
+  case X86::MOV64ImmSExti8:
+    BuildMI(MBB, I, DL, get(X86::PUSH64i8)).addImm(Imm);
+    MIB->setDesc(get(X86::POP64r));
+    break;
+  default:
+    llvm_unreachable("Unxpected opcode!");
+  }
+
+  // Build CFI if necessary.
+  const X86FrameLowering *TFL = Subtarget.getFrameLowering();
+  MachineFunction &MF = *MBB.getParent();
+  if (!TFL->hasFP(MF) && MF.getMMI().usePreciseUnwindInfo()) {
+    TFL->BuildCFI(MBB, I, DL,
+                  MCCFIInstruction::createAdjustCfaOffset(nullptr, 4));
+    TFL->BuildCFI(MBB, std::next(I), DL,
+                  MCCFIInstruction::createAdjustCfaOffset(nullptr, -4));
+  }
+
+  return true;
+}
+
 // LoadStackGuard has so far only been implemented for 64-bit MachO. Different
 // code sequence is needed for other targets.
 static void expandLoadStackGuard(MachineInstrBuilder &MIB,
@@ -5258,6 +5292,9 @@
   switch (MI->getOpcode()) {
   case X86::MOV32r0:
     return Expand2AddrUndef(MIB, get(X86::XOR32rr));
+  case X86::MOV32ImmSExti8:
+  case X86::MOV64ImmSExti8:
+    return ExpandMOVImmSExti8(MIB);
   case X86::SETB_C8r:
     return Expand2AddrUndef(MIB, get(X86::SBB8rr));
   case X86::SETB_C16r:
Index: test/CodeGen/X86/mov-32imm-sext-i8.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/mov-32imm-sext-i8.ll
@@ -0,0 +1,96 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s
+
+define i32 @test_32_nooptsize() {
+entry:
+  ret i32 -1
+
+; Check that we use regular MOV when not optimizing for size.
+; CHECK-LABEL: test_32_nooptsize:
+; CHECK: movl $-1, %eax
+; CHECK: retq
+}
+
+define i32 @test_32() optsize {
+entry:
+  ret i32 -1
+
+; When optimizing for size, use PUSH/POP for 8-bit immediates,
+; as it encodes 2 bytes smaller than MOV.
+; CHECK-LABEL: test_32:
+; CHECK: pushl $-1
+; CHECK: popl %eax
+; CHECK: retq
+}
+
+define i32 @test_32_not() optsize {
+entry:
+  ret i32 128
+
+; While 128 does fit in 8 bits, we can't use PUSH/POP because that
+; would sign-extend it to a different value.
+; CHECK-LABEL: test_32_not:
+; CHECK: movl $128, %eax
+; CHECK: retq
+}
+
+define i64 @test_64() optsize {
+entry:
+  ret i64 127
+
+; PUSH/POP is used on 64-bit too.
+; CHECK-LABEL: test_64:
+; CHECK: pushq $127
+; CHECK: popq %rax
+; CHECK: retq
+}
+
+define i64 @test_64_nooptsize() {
+entry:
+  ret i64 127
+
+; PUSH/POP only used when optimizing for size.
+; CHECK-LABEL: test_64_nooptsize:
+; CHECK: movl $127, %eax
+; CHECK: retq
+}
+
+define i64 @test_minsize() minsize {
+entry:
+  ret i64 5
+
+; Minsize implies optsize.
+; CHECK-LABEL: test_minsize:
+; CHECK: pushq $5
+; CHECK: popq %rax
+; CHECK: retq
+}
+
+define zeroext i8 @test_zero() minsize {
+entry:
+  ret i8 0
+
+; In this function we'd select push/pop instead of xor unless we make sure the
+; latter has higher AddedComplexity.
+; CHECK-LABEL: test_zero:
+; CHECK: xorl %eax, %eax
+; CHECK: retq
+}
+
+define i32 @test_cfi() optsize !dbg !4 {
+entry:
+  ret i32 42
+; CHECK-LABEL: test_cfi:
+; CHECK: pushl $42
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: popl %eax
+; CHECK: .cfi_adjust_cfa_offset -4
+; CHECK: retq
+}
+
+!llvm.dbg.cu = !{!0}
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1)
+!1 = !DIFile(filename: "a.c", directory: "/")
+!llvm.module.flags = !{!6, !7}
+!4 = distinct !DISubprogram()
+!6 = !{i32 2, !"Dwarf Version", i32 4}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
Index: test/CodeGen/X86/movtopush.ll
===================================================================
--- test/CodeGen/X86/movtopush.ll
+++ test/CodeGen/X86/movtopush.ll
@@ -114,7 +114,8 @@
 
 ; We support weird calling conventions
 ; NORMAL-LABEL: test4:
-; NORMAL: movl    $2, %eax
+; NORMAL: pushl $2
+; NORMAL: popl %eax
 ; NORMAL-NEXT: pushl   $4
 ; NORMAL-NEXT: pushl   $3
 ; NORMAL-NEXT: pushl   $1
Index: test/CodeGen/X86/powi.ll
===================================================================
--- test/CodeGen/X86/powi.ll
+++ test/CodeGen/X86/powi.ll
@@ -20,18 +20,18 @@
 define double @pow_wrapper_optsize(double %a) optsize {
 ; CHECK-LABEL: pow_wrapper_optsize:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movl  $15, %edi
+; CHECK-NEXT:    movl  $128, %edi
 ; CHECK-NEXT:    jmp
-  %ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
+  %ret = tail call double @llvm.powi.f64(double %a, i32 128) nounwind ; <double> [#uses=1]
   ret double %ret
 }
 
 define double @pow_wrapper_minsize(double %a) minsize {
 ; CHECK-LABEL: pow_wrapper_minsize:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movl  $15, %edi
+; CHECK-NEXT:    movl  $128, %edi
 ; CHECK-NEXT:    jmp
-  %ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
+  %ret = tail call double @llvm.powi.f64(double %a, i32 128) nounwind ; <double> [#uses=1]
   ret double %ret
 }