Index: lib/Target/X86/X86InstrCompiler.td
===================================================================
--- lib/Target/X86/X86InstrCompiler.td
+++ lib/Target/X86/X86InstrCompiler.td
@@ -250,10 +250,11 @@
 // Alias instruction mapping movr0 to xor.
 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
 let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
-    isPseudo = 1 in
+    isPseudo = 1, AddedComplexity = 20 in
 def MOV32r0  : I<0, Pseudo, (outs GR32:$dst), (ins), "",
                  [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
 
+
 // Other widths can also make use of the 32-bit xor, which may have a smaller
 // encoding and avoid partial register updates.
 def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;
@@ -262,6 +263,15 @@
   let AddedComplexity = 20;
 }
 
+let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in {
+// AddedComplexity is selected to be higher than MOV64ri but lower than MOV32r0.
+def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "",
+                       [(set GR32:$dst, i32immSExt8:$src)]>, Requires<[OptForSize]>;
+def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",
+                       [(set GR64:$dst, i64immSExt8:$src)]>, Requires<[OptForSize]>;
+// XXX: Is leaving out the instruction itinerary class and Schedule OK?
+}
+
 // Materialize i64 constant where top 32-bits are zero. This could theoretically
 // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
 // that would make it more difficult to rematerialize.
Index: lib/Target/X86/X86InstrInfo.cpp
===================================================================
--- lib/Target/X86/X86InstrInfo.cpp
+++ lib/Target/X86/X86InstrInfo.cpp
@@ -5232,6 +5232,30 @@
   return true;
 }
 
+static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB,
+                               const TargetInstrInfo &TII) {
+  MachineBasicBlock &MBB = *MIB->getParent();
+  DebugLoc DL = MIB->getDebugLoc();
+  int64_t Imm = MIB->getOperand(1).getImm();
+  assert(Imm != 0 && "Using push/pop for 0 is not efficient.");
+  MachineBasicBlock::iterator I = MIB.getInstr();
+
+  switch (MIB->getOpcode()) {
+  case X86::MOV32ImmSExti8:
+    BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm);
+    MIB->setDesc(TII.get(X86::POP32r));
+    break;
+  case X86::MOV64ImmSExti8:
+    BuildMI(MBB, I, DL, TII.get(X86::PUSH64i8)).addImm(Imm);
+    MIB->setDesc(TII.get(X86::POP64r));
+    break;
+  default:
+    llvm_unreachable("Unxpected opcode!");
+  }
+
+  return true;
+}
+
 // LoadStackGuard has so far only been implemented for 64-bit MachO. Different
 // code sequence is needed for other targets.
 static void expandLoadStackGuard(MachineInstrBuilder &MIB,
@@ -5260,6 +5284,9 @@
   switch (MI->getOpcode()) {
   case X86::MOV32r0:
     return Expand2AddrUndef(MIB, get(X86::XOR32rr));
+  case X86::MOV32ImmSExti8:
+  case X86::MOV64ImmSExti8:
+    return ExpandMOVImmSExti8(MIB, *this);
   case X86::SETB_C8r:
     return Expand2AddrUndef(MIB, get(X86::SBB8rr));
   case X86::SETB_C16r:
Index: test/CodeGen/X86/mov-32imm-sext-i8.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/mov-32imm-sext-i8.ll
@@ -0,0 +1,77 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s
+
+define i32 @test_32_nooptsize() {
+entry:
+  ret i32 -1
+
+; Check that we use regular MOV when not optimizing for size.
+; CHECK-LABEL: test_32_nooptsize:
+; CHECK: movl $-1, %eax
+; CHECK: retq
+}
+
+define i32 @test_32() optsize {
+entry:
+  ret i32 -1
+
+; When optimizing for size, use PUSH/POP for 8-bit immediates,
+; as it encodes 2 bytes smaller than MOV.
+; CHECK-LABEL: test_32:
+; CHECK: pushl $-1
+; CHECK: popl %eax
+; CHECK: retq
+}
+
+define i32 @test_32_not() optsize {
+entry:
+  ret i32 128
+
+; While 128 does fit in 8 bits, we can't use PUSH/POP because that
+; would sign-extend it to a different value.
+; CHECK-LABEL: test_32_not:
+; CHECK: movl $128, %eax
+; CHECK: retq
+}
+
+define i64 @test_64() optsize {
+entry:
+  ret i64 127
+
+; PUSH/POP is used on 64-bit too.
+; CHECK-LABEL: test_64:
+; CHECK: pushq $127
+; CHECK: popq %rax
+; CHECK: retq
+}
+
+define i64 @test_64_nooptsize() {
+entry:
+  ret i64 127
+
+; PUSH/POP only used when optimizing for size.
+; CHECK-LABEL: test_64_nooptsize:
+; CHECK: movl $127, %eax
+; CHECK: retq
+}
+
+define i64 @test_minsize() minsize {
+entry:
+  ret i64 5
+
+; Minsize implies optsize.
+; CHECK-LABEL: test_minsize:
+; CHECK: pushq $5
+; CHECK: popq %rax
+; CHECK: retq
+}
+
+define zeroext i8 @test_zero() minsize {
+entry:
+  ret i8 0
+
+; In this function we'd select push/pop instead of xor unless we make sure the
+; latter has higher AddedComplexity.
+; CHECK-LABEL: test_zero:
+; CHECK: xorl %eax, %eax
+; CHECK: retq
+}
Index: test/CodeGen/X86/movtopush.ll
===================================================================
--- test/CodeGen/X86/movtopush.ll
+++ test/CodeGen/X86/movtopush.ll
@@ -114,7 +114,8 @@
 
 ; We support weird calling conventions
 ; NORMAL-LABEL: test4:
-; NORMAL: movl    $2, %eax
+; NORMAL: pushl $2
+; NORMAL: popl %eax
 ; NORMAL-NEXT: pushl   $4
 ; NORMAL-NEXT: pushl   $3
 ; NORMAL-NEXT: pushl   $1
Index: test/CodeGen/X86/powi.ll
===================================================================
--- test/CodeGen/X86/powi.ll
+++ test/CodeGen/X86/powi.ll
@@ -20,18 +20,18 @@
 define double @pow_wrapper_optsize(double %a) optsize {
 ; CHECK-LABEL: pow_wrapper_optsize:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movl  $15, %edi
+; CHECK-NEXT:    movl  $128, %edi
 ; CHECK-NEXT:    jmp
-  %ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
+  %ret = tail call double @llvm.powi.f64(double %a, i32 128) nounwind ; <double> [#uses=1]
   ret double %ret
 }
 
 define double @pow_wrapper_minsize(double %a) minsize {
 ; CHECK-LABEL: pow_wrapper_minsize:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movl  $15, %edi
+; CHECK-NEXT:    movl  $128, %edi
 ; CHECK-NEXT:    jmp
-  %ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
+  %ret = tail call double @llvm.powi.f64(double %a, i32 128) nounwind ; <double> [#uses=1]
   ret double %ret
 }