diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp
--- a/llvm/lib/Target/X86/X86CallingConv.cpp
+++ b/llvm/lib/Target/X86/X86CallingConv.cpp
@@ -330,5 +330,15 @@
   return true;
 }
 
+static bool CC_X86_64_Pointer(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                              CCValAssign::LocInfo &LocInfo,
+                              ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  if (LocVT != MVT::i64) {
+    LocVT = MVT::i64;
+    LocInfo = CCValAssign::ZExt;
+  }
+  return false;
+}
+
 // Provides entry points of CC_X86 and RetCC_X86.
 #include "X86GenCallingConv.inc"
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -336,6 +336,9 @@
   // MMX vector types are always returned in XMM0.
   CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
 
+  // Pointers are always returned in full 64-bit registers.
+  CCIfPtr<CCCustom<"CC_X86_64_Pointer">>,
+
   CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R12]>>>,
 
   CCDelegateTo<RetCC_X86Common>
@@ -518,6 +521,9 @@
   CCIfCC<"CallingConv::Swift",
     CCIfSRet<CCIfType<[i64], CCAssignToReg<[RAX]>>>>,
 
+  // Pointers are always passed in full 64-bit registers.
+  CCIfPtr<CCCustom<"CC_X86_64_Pointer">>,
+
   // The first 6 integer arguments are passed in integer registers.
   CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
   CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3063,8 +3063,9 @@
                         // This truncation won't change the value.
                         DAG.getIntPtrConstant(1, dl));
 
-    if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
+    if (VA.isExtInLoc()) {
       if (VA.getValVT().isVector() &&
+          VA.getValVT().getScalarType() == MVT::i1 &&
           ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
            (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
         // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
diff --git a/llvm/test/CodeGen/X86/musttail-varargs.ll b/llvm/test/CodeGen/X86/musttail-varargs.ll
--- a/llvm/test/CodeGen/X86/musttail-varargs.ll
+++ b/llvm/test/CodeGen/X86/musttail-varargs.ll
@@ -136,7 +136,7 @@
 ; LINUX-X32-NEXT:    movq %rcx, %r13
 ; LINUX-X32-NEXT:    movq %rdx, %rbp
 ; LINUX-X32-NEXT:    movq %rsi, %rbx
-; LINUX-X32-NEXT:    movl %edi, %r14d
+; LINUX-X32-NEXT:    movq %rdi, %r14
 ; LINUX-X32-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; LINUX-X32-NEXT:    testb %al, %al
 ; LINUX-X32-NEXT:    je .LBB0_2
@@ -161,7 +161,7 @@
 ; LINUX-X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; LINUX-X32-NEXT:    movabsq $206158430216, %rax # imm = 0x3000000008
 ; LINUX-X32-NEXT:    movq %rax, {{[0-9]+}}(%esp)
-; LINUX-X32-NEXT:    movl %r14d, %edi
+; LINUX-X32-NEXT:    movq %r14, %rdi
 ; LINUX-X32-NEXT:    movaps %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; LINUX-X32-NEXT:    movaps %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; LINUX-X32-NEXT:    movaps %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
@@ -172,7 +172,7 @@
 ; LINUX-X32-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
 ; LINUX-X32-NEXT:    callq get_f
 ; LINUX-X32-NEXT:    movl %eax, %r11d
-; LINUX-X32-NEXT:    movl %r14d, %edi
+; LINUX-X32-NEXT:    movq %r14, %rdi
 ; LINUX-X32-NEXT:    movq %rbx, %rsi
 ; LINUX-X32-NEXT:    movq %rbp, %rdx
 ; LINUX-X32-NEXT:    movq %r13, %rcx
@@ -306,8 +306,7 @@
 ;
 ; LINUX-X32-LABEL: g_thunk:
 ; LINUX-X32:       # %bb.0:
-; LINUX-X32-NEXT:    movl %edi, %r11d
-; LINUX-X32-NEXT:    jmpq *%r11 # TAILCALL
+; LINUX-X32-NEXT:    jmpq *%rdi # TAILCALL
 ;
 ; WINDOWS-LABEL: g_thunk:
 ; WINDOWS:       # %bb.0:
@@ -348,10 +347,12 @@
 ; LINUX-X32-NEXT:    jne .LBB2_2
 ; LINUX-X32-NEXT:  # %bb.1: # %then
 ; LINUX-X32-NEXT:    movl 4(%edi), %r11d
+; LINUX-X32-NEXT:    movl %edi, %edi
 ; LINUX-X32-NEXT:    jmpq *%r11 # TAILCALL
 ; LINUX-X32-NEXT:  .LBB2_2: # %else
 ; LINUX-X32-NEXT:    movl 8(%edi), %r11d
 ; LINUX-X32-NEXT:    movl $42, {{.*}}(%rip)
+; LINUX-X32-NEXT:    movl %edi, %edi
 ; LINUX-X32-NEXT:    jmpq *%r11 # TAILCALL
 ;
 ; WINDOWS-LABEL: h_thunk:
diff --git a/llvm/test/CodeGen/X86/pr38865-2.ll b/llvm/test/CodeGen/X86/pr38865-2.ll
--- a/llvm/test/CodeGen/X86/pr38865-2.ll
+++ b/llvm/test/CodeGen/X86/pr38865-2.ll
@@ -10,6 +10,7 @@
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    # kill: def $edi killed $edi killed $rdi
 ; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; CHECK-NEXT:    callq _Z1bv
diff --git a/llvm/test/CodeGen/X86/pr38865-3.ll b/llvm/test/CodeGen/X86/pr38865-3.ll
--- a/llvm/test/CodeGen/X86/pr38865-3.ll
+++ b/llvm/test/CodeGen/X86/pr38865-3.ll
@@ -10,6 +10,7 @@
 ; CHECK-NEXT:    movl $707406378, %eax # encoding: [0xb8,0x2a,0x2a,0x2a,0x2a]
 ; CHECK-NEXT:    # imm = 0x2A2A2A2A
 ; CHECK-NEXT:    movl $32, %ecx # encoding: [0xb9,0x20,0x00,0x00,0x00]
+; CHECK-NEXT:    # kill: def $edi killed $edi killed $rdi
 ; CHECK-NEXT:    rep;stosl %eax, %es:(%edi) # encoding: [0xf3,0x67,0xab]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   call void @llvm.memset.p0i8.i32(i8* align 4 %x, i8 42, i32 128, i1 false)
diff --git a/llvm/test/CodeGen/X86/pr38865.ll b/llvm/test/CodeGen/X86/pr38865.ll
--- a/llvm/test/CodeGen/X86/pr38865.ll
+++ b/llvm/test/CodeGen/X86/pr38865.ll
@@ -15,7 +15,7 @@
 ; CHECK-NEXT:    subl $528, %esp # encoding: [0x81,0xec,0x10,0x02,0x00,0x00]
 ; CHECK-NEXT:    # imm = 0x210
 ; CHECK-NEXT:    leal {{[0-9]+}}(%rsp), %ebx # encoding: [0x8d,0x9c,0x24,0x08,0x01,0x00,0x00]
-; CHECK-NEXT:    movl %ebx, %edi # encoding: [0x89,0xdf]
+; CHECK-NEXT:    movq %rbx, %rdi # encoding: [0x48,0x89,0xdf]
 ; CHECK-NEXT:    movl $c, %esi # encoding: [0xbe,A,A,A,A]
 ; CHECK-NEXT:    # fixup A - offset: 1, value: c, kind: FK_Data_4
 ; CHECK-NEXT:    movl $260, %edx # encoding: [0xba,0x04,0x01,0x00,0x00]
diff --git a/llvm/test/CodeGen/X86/sibcall.ll b/llvm/test/CodeGen/X86/sibcall.ll
--- a/llvm/test/CodeGen/X86/sibcall.ll
+++ b/llvm/test/CodeGen/X86/sibcall.ll
@@ -74,16 +74,13 @@
 ;
 ; X32-LABEL: t4:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl %edi, %eax
+; X32-NEXT:    movq %rdi, %rax
 ; X32-NEXT:    xorl %edi, %edi
 ; X32-NEXT:    jmpq *%rax # TAILCALL
   tail call void %x(i32 0) nounwind
   ret void
 }
 
-; FIXME: This isn't needed since x32 psABI specifies that callers must
-;        zero-extend pointers passed in registers.
-
 define void @t5(void ()* nocapture %x) nounwind ssp {
 ; X86-LABEL: t5:
 ; X86:       # %bb.0:
@@ -95,8 +92,7 @@
 ;
 ; X32-LABEL: t5:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl %edi, %eax
-; X32-NEXT:    jmpq *%rax # TAILCALL
+; X32-NEXT:    jmpq *%rdi # TAILCALL
   tail call void %x() nounwind
   ret void
 }
@@ -227,7 +223,7 @@
 ;
 ; X32-LABEL: t9:
 ; X32:       # %bb.0: # %entry
-; X32-NEXT:    movl %edi, %eax
+; X32-NEXT:    movq %rdi, %rax
 ; X32-NEXT:    xorl %edi, %edi
 ; X32-NEXT:    jmpq *%rax # TAILCALL
 entry:
@@ -400,6 +396,7 @@
 ; X32-NEXT:    pushq %rcx
 ; X32-NEXT:    callq foo7
 ; X32-NEXT:    addl $32, %esp
+; X32-NEXT:    movl %eax, %eax
 ; X32-NEXT:    popq %rcx
 ; X32-NEXT:    retq
 entry:
@@ -477,7 +474,7 @@
 ; X32-LABEL: t15:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %edi, %ebx
+; X32-NEXT:    movq %rdi, %rbx
 ; X32-NEXT:    callq f
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    popq %rbx
@@ -651,7 +648,7 @@
 ; X32-LABEL: t21_sret_to_sret:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %edi, %ebx
+; X32-NEXT:    movq %rdi, %rbx
 ; X32-NEXT:    callq t21_f_sret
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    popq %rbx
@@ -689,7 +686,7 @@
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
 ; X32-NEXT:    subl $16, %esp
-; X32-NEXT:    movl %edi, %ebx
+; X32-NEXT:    movq %rdi, %rbx
 ; X32-NEXT:    movl %esp, %edi
 ; X32-NEXT:    callq t21_f_sret
 ; X32-NEXT:    movl %ebx, %eax
@@ -727,7 +724,7 @@
 ; X32-LABEL: t21_sret_to_sret_more_args:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %edi, %ebx
+; X32-NEXT:    movq %rdi, %rbx
 ; X32-NEXT:    callq f_sret
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    popq %rbx
@@ -762,8 +759,8 @@
 ; X32-LABEL: t21_sret_to_sret_second_arg_sret:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %esi, %ebx
-; X32-NEXT:    movl %esi, %edi
+; X32-NEXT:    movq %rsi, %rbx
+; X32-NEXT:    movq %rsi, %rdi
 ; X32-NEXT:    callq t21_f_sret
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    popq %rbx
@@ -803,7 +800,7 @@
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl %edi, %ebx
+; X32-NEXT:    movq %rdi, %rbx
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %edx
 ; X32-NEXT:    callq f_sret
@@ -841,8 +838,8 @@
 ; X32-LABEL: t21_sret_to_sret_args_mismatch:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %edi, %ebx
-; X32-NEXT:    movl %esi, %edi
+; X32-NEXT:    movq %rdi, %rbx
+; X32-NEXT:    movq %rsi, %rdi
 ; X32-NEXT:    callq t21_f_sret
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    popq %rbx
@@ -877,8 +874,8 @@
 ; X32-LABEL: t21_sret_to_sret_args_mismatch2:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %edi, %ebx
-; X32-NEXT:    movl %esi, %edi
+; X32-NEXT:    movq %rdi, %rbx
+; X32-NEXT:    movq %rsi, %rdi
 ; X32-NEXT:    callq t21_f_sret
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    popq %rbx
@@ -915,7 +912,7 @@
 ; X32-LABEL: t21_sret_to_sret_arg_mismatch:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %edi, %ebx
+; X32-NEXT:    movq %rdi, %rbx
 ; X32-NEXT:    callq ret_struct
 ; X32-NEXT:    movl %eax, %edi
 ; X32-NEXT:    callq t21_f_sret
@@ -964,19 +961,19 @@
 ;
 ; X32-LABEL: t21_sret_to_sret_structs_mismatch:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushq %rbp
+; X32-NEXT:    pushq %r14
 ; X32-NEXT:    pushq %rbx
 ; X32-NEXT:    pushq %rax
-; X32-NEXT:    movl %esi, %ebx
-; X32-NEXT:    movl %edi, %ebp
+; X32-NEXT:    movq %rsi, %rbx
+; X32-NEXT:    movq %rdi, %r14
 ; X32-NEXT:    callq ret_struct
-; X32-NEXT:    movl %ebx, %edi
 ; X32-NEXT:    movl %eax, %esi
+; X32-NEXT:    movq %rbx, %rdi
 ; X32-NEXT:    callq t21_f_sret2
-; X32-NEXT:    movl %ebp, %eax
+; X32-NEXT:    movl %r14d, %eax
 ; X32-NEXT:    addl $8, %esp
 ; X32-NEXT:    popq %rbx
-; X32-NEXT:    popq %rbp
+; X32-NEXT:    popq %r14
 ; X32-NEXT:    retq
   %b = call fastcc %struct.foo* @ret_struct()
   tail call fastcc void @t21_f_sret2(%struct.foo* noalias sret %a, %struct.foo* noalias %b) nounwind
@@ -1010,7 +1007,7 @@
 ; X32-LABEL: t21_sret_to_non_sret:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %edi, %ebx
+; X32-NEXT:    movq %rdi, %rbx
 ; X32-NEXT:    callq t21_f_non_sret
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    popq %rbx
diff --git a/llvm/test/CodeGen/X86/x32-function_pointer-2.ll b/llvm/test/CodeGen/X86/x32-function_pointer-2.ll
--- a/llvm/test/CodeGen/X86/x32-function_pointer-2.ll
+++ b/llvm/test/CodeGen/X86/x32-function_pointer-2.ll
@@ -14,8 +14,8 @@
 entry:
   tail call void %foo(i8* %h) nounwind
 ; CHECK: mov{{l|q}}	%{{e|r}}si, %{{e|r}}[[REG:.*]]{{d?}}
-; CHECK: callq	*%r[[REG]]
+; CHECK: callq	*%r
   tail call void %foo(i8* %h) nounwind
-; CHECK: jmpq	*%r{{[^,]*}}
+; CHECK: jmpq	*%r
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/x86-64-sret-return.ll b/llvm/test/CodeGen/X86/x86-64-sret-return.ll
--- a/llvm/test/CodeGen/X86/x86-64-sret-return.ll
+++ b/llvm/test/CodeGen/X86/x86-64-sret-return.ll
@@ -7,9 +7,10 @@
 ; CHECK-LABEL: bar:
 ; CHECK: movq %rdi, %rax
 
-; For the x32 ABI, pointers are 32-bit so 32-bit instructions will be used
+; For the x32 ABI, pointers are 32-bit but passed in zero-extended to 64-bit
+; so either 32-bit or 64-bit instructions may be used.
 ; X32ABI-LABEL: bar:
-; X32ABI: movl %edi, %eax
+; X32ABI: mov{{l|q}} %{{r|e}}di, %{{r|e}}ax
 
 define void @bar(%struct.foo* noalias sret  %agg.result, %struct.foo* %d) nounwind  {
 entry:
@@ -63,9 +64,10 @@
 ; CHECK-LABEL: foo:
 ; CHECK: movq %rdi, %rax
 
-; For the x32 ABI, pointers are 32-bit so 32-bit instructions will be used
+; For the x32 ABI, pointers are 32-bit but passed in zero-extended to 64-bit
+; so either 32-bit or 64-bit instructions may be used.
 ; X32ABI-LABEL: foo:
-; X32ABI: movl %edi, %eax
+; X32ABI: mov{{l|q}} %{{r|e}}di, %{{r|e}}ax
 
 define void @foo({ i64 }* noalias nocapture sret %agg.result) nounwind {
   store { i64 } { i64 0 }, { i64 }* %agg.result