diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -6831,6 +6831,9 @@
   if (ArgFlags.isNest())
     report_fatal_error("Nest arguments are unimplemented.");
 
+  if (ValVT.isVector() || LocVT.isVector())
+    report_fatal_error("Vector arguments are unimplemented on AIX.");
+
   const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
       State.getMachineFunction().getSubtarget());
   const bool IsPPC64 = Subtarget.isPPC64();
@@ -6875,18 +6878,33 @@
     // This includes f64 in 64-bit mode for ABI compatibility.
     State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4);
     if (unsigned Reg = State.AllocateReg(FPR))
-      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::f64, LocInfo));
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
     else
       report_fatal_error("Handling of placing parameters on the stack is "
                          "unimplemented!");
 
-    // f32 reserves 1 GPR in both PPC32 and PPC64.
-    // f64 reserves 2 GPRs in PPC32 and 1 GPR in PPC64.
-    for (unsigned i = 0; i < StoreSize; i += PtrByteSize)
-      State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32);
+    // AIX requires that GPRs are reserved for float arguments.
+    // Successfully reserved GPRs are only initialized for vararg calls.
+    MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+    for (unsigned I = 0; I < StoreSize; I += PtrByteSize) {
+      if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+        if (State.isVarArg()) {
+          // Custom handling is required for:
+          //   f64 in PPC32 needs to be split into 2 GPRs.
+          //   f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
+          State.addLoc(
+              CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
+        }
+      } else if (State.isVarArg()) {
+        report_fatal_error("Handling of placing parameters on the stack is "
+                           "unimplemented!");
+      }
+    }
+
     return false;
   }
   }
+  return true;
 }
 
 static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
@@ -7013,7 +7031,7 @@
           CallConv == CallingConv::Cold ||
           CallConv == CallingConv::Fast) && "Unexpected calling convention!");
 
-  if (isVarArg || isPatchPoint)
+  if (isPatchPoint)
     report_fatal_error("This call type is unimplemented on AIX.");
 
   const PPCSubtarget& Subtarget =
@@ -7032,7 +7050,8 @@
   //   [SP][CR][LR][2 x reserved][TOC].
   // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
-  const unsigned PtrByteSize = Subtarget.isPPC64() ? 8 : 4;
+  const bool IsPPC64 = Subtarget.isPPC64();
+  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
   CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
 
@@ -7052,26 +7071,70 @@
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
 
-  for (CCValAssign &VA : ArgLocs) {
+  for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
+    CCValAssign &VA = ArgLocs[I++];
+
+    if (VA.isMemLoc())
+      report_fatal_error("Handling of placing parameters on the stack is "
+                         "unimplemented!");
+    if (!VA.isRegLoc())
+      report_fatal_error(
+          "Unexpected non-register location for function call argument.");
+
     SDValue Arg = OutVals[VA.getValNo()];
 
-    switch (VA.getLocInfo()) {
-    default: report_fatal_error("Unexpected argument extension type.");
-    case CCValAssign::Full: break;
-    case CCValAssign::ZExt:
-      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
-      break;
-    case CCValAssign::SExt:
-      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
-      break;
+    if (!VA.needsCustom()) {
+      switch (VA.getLocInfo()) {
+      default:
+        report_fatal_error("Unexpected argument extension type.");
+      case CCValAssign::Full:
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      }
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+
+      continue;
     }
 
-    if (VA.isRegLoc())
-      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    // Custom handling is used for GPR initializations for vararg float
+    // arguments.
+    assert(isVarArg && VA.getValVT().isFloatingPoint() &&
+           VA.getLocVT().isInteger() &&
+           "Unexpected custom register handling for calling convention.");
 
-    if (VA.isMemLoc())
-      report_fatal_error("Handling of placing parameters on the stack is "
-                         "unimplemented!");
+    SDValue ArgAsInt =
+        DAG.getBitcast(MVT::getIntegerVT(VA.getValVT().getSizeInBits()), Arg);
+
+    if (Arg.getValueType().getStoreSize() == VA.getLocVT().getStoreSize())
+      // f32 in 32-bit GPR
+      // f64 in 64-bit GPR
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
+    else if (Arg.getValueType().getSizeInBits() < VA.getLocVT().getSizeInBits())
+      // f32 in 64-bit GPR.
+      RegsToPass.push_back(std::make_pair(
+          VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, VA.getLocVT())));
+    else {
+      // f64 in two 32-bit GPRs
+      // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
+      assert(Arg.getValueType() == MVT::f64 && isVarArg && !IsPPC64 &&
+             "Unexpected custom register for argument!");
+      CCValAssign &GPR1 = VA;
+      SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
+                                     DAG.getConstant(32, dl, MVT::i8));
+      RegsToPass.push_back(std::make_pair(
+          GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
+      assert(I != E && "A second custom GPR is expected!");
+      CCValAssign &GPR2 = ArgLocs[I++];
+      assert(GPR2.isRegLoc() && GPR2.getValNo() == GPR1.getValNo() &&
+             GPR2.needsCustom() && "A second custom GPR is expected!");
+      RegsToPass.push_back(std::make_pair(
+          GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
+    }
   }
 
   // For indirect calls, we need to save the TOC base to the stack for
diff --git a/llvm/test/CodeGen/PowerPC/aix_cc_abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
rename from llvm/test/CodeGen/PowerPC/aix_cc_abi.ll
rename to llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
--- a/llvm/test/CodeGen/PowerPC/aix_cc_abi.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
@@ -1,9 +1,17 @@
 ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \
 ; RUN: FileCheck --check-prefixes=CHECK,32BIT %s
 
+; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \
+; RUN:  -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN: FileCheck --check-prefixes=CHECKASM,ASM32PWR4 %s
+
 ; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \
 ; RUN: FileCheck --check-prefixes=CHECK,64BIT %s
 
+; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \
+; RUN:  -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN: FileCheck --check-prefixes=CHECKASM,ASM64PWR4 %s
+
 define void @call_test_chars() {
 entry:
   call i8 @test_chars(i8 signext 97, i8 signext 97, i8 signext 97, i8 signext 97)
@@ -148,7 +156,6 @@
   call void @test_i1(i1 1)
   ret void
 }
-
 ; CHECK-LABEL: name: call_test_i1
 
 ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
@@ -251,7 +258,6 @@
   ret void
 }
 
-
 ; CHECK-LABEL: name: call_test_i64
 
 ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
@@ -612,3 +618,263 @@
 ; 64BIT:        body:             |
 ; 64BIT-NEXT:    bb.0.entry:
 ; 64BIT-NEXT:     liveins: $x3, $x4, $x5, $x6, $x7
+
+define void @call_test_vararg() {
+entry:
+  %0 = load float, float* @f1, align 4
+  %conv = fpext float %0 to double
+  %1 = load double, double* @d1, align 8
+  call void (i32, ...) @test_vararg(i32 42, double %conv, double %1)
+  ret void
+}
+
+declare void @test_vararg(i32, ...)
+
+; CHECK-LABEL:     name: call_test_vararg
+
+; 32BIT:      renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got)
+; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1)
+; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got)
+; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
+; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
+; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8)
+; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4)
+; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
+; 32BIT-NEXT: renamable $r6 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8)
+; 32BIT-NEXT: renamable $r7 = LWZ 4, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]] + 4)
+; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT: $r3 = LI 42
+; 32BIT-NEXT: BL_NOP <mcsymbol .test_vararg>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit $f2, implicit $r6, implicit $r7, implicit $r2, implicit-def $r1
+; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; CHECKASM-LABEL: .call_test_vararg:
+
+; ASM32PWR4:      stwu 1, -80(1)
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC1(2)
+; ASM32PWR4-NEXT: lfs 1, 0([[REG]])
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC2(2)
+; ASM32PWR4-NEXT: stfd 1, 64(1)
+; ASM32PWR4-NEXT: lfd 2, 0([[REG]])
+; ASM32PWR4-NEXT: li 3, 42
+; ASM32PWR4-NEXT: stfd 2, 72(1)
+; ASM32PWR4-DAG:  lwz 4, 64(1)
+; ASM32PWR4-DAG:  lwz 5, 68(1)
+; ASM32PWR4-DAG:  lwz 6, 72(1)
+; ASM32PWR4-DAG:  lwz 7, 76(1)
+; ASM32PWR4-NEXT: bl .test_vararg
+; ASM32PWR4-NEXT: nop
+
+; 64BIT:      renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got)
+; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load 4 from @f1)
+; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got)
+; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
+; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load 8 from @d1)
+; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load 8 from %stack.[[SLOT1]])
+; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
+; 64BIT-NEXT: renamable $x5 = LD 0, %stack.[[SLOT2]] :: (load 8 from %stack.[[SLOT2]])
+; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT: $x3 = LI8 42
+; 64BIT-NEXT: BL8_NOP <mcsymbol .test_vararg>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit $f2, implicit $x5, implicit $x2, implicit-def $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4:      stdu 1, -128(1)
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC1(2)
+; ASM64PWR4-NEXT: lfs 1, 0([[REG]])
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC2(2)
+; ASM64PWR4-NEXT: stfd 1, 112(1)
+; ASM64PWR4-NEXT: lfd 2, 0([[REG]])
+; ASM64PWR4-NEXT: li 3, 42
+; ASM64PWR4-NEXT: stfd 2, 120(1)
+; ASM64PWR4-NEXT: ld 4, 112(1)
+; ASM64PWR4-NEXT: ld 5, 120(1)
+; ASM64PWR4-NEXT: bl .test_vararg
+; ASM64PWR4-NEXT: nop
+
+define void @call_test_vararg2() {
+entry:
+  %0 = load float, float* @f1, align 4
+  %conv = fpext float %0 to double
+  %1 = load double, double* @d1, align 8
+  call void (i32, ...) @test_vararg(i32 42, double %conv, i32 42, double %1)
+  ret void
+}
+
+; CHECK-LABEL:     name: call_test_vararg2
+
+; 32BIT:      renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got)
+; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1)
+; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got)
+; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
+; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
+; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8)
+; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4)
+; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
+; 32BIT-NEXT: renamable $r7 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8)
+; 32BIT-NEXT: renamable $r8 = LWZ 4, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]] + 4)
+; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT: $r3 = LI 42
+; 32BIT-NEXT: $r6 = LI 42
+; 32BIT-NEXT: BL_NOP <mcsymbol .test_vararg>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit killed $r6, implicit $f2, implicit $r7, implicit $r8, implicit $r2, implicit-def $r1
+; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; ASM32PWR4:      stwu 1, -80(1)
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC1(2)
+; ASM32PWR4-NEXT: li 6, 42
+; ASM32PWR4-NEXT: lfs 1, 0([[REG]])
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC2(2)
+; ASM32PWR4-NEXT: stfd 1, 64(1)
+; ASM32PWR4-NEXT: lfd 2, 0([[REG]])
+; ASM32PWR4-NEXT: li 3, 42
+; ASM32PWR4-NEXT: stfd 2, 72(1)
+; ASM32PWR4-DAG: lwz 4, 64(1)
+; ASM32PWR4-DAG: lwz 5, 68(1)
+; ASM32PWR4-DAG: lwz 7, 72(1)
+; ASM32PWR4-DAG: lwz 8, 76(1)
+; ASM32PWR4-NEXT: bl .test_vararg
+; ASM32PWR4-NEXT: nop
+
+; 64BIT:      renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got)
+; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load 4 from @f1)
+; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got)
+; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
+; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load 8 from @d1)
+; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load 8 from %stack.[[SLOT1]])
+; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
+; 64BIT-NEXT: renamable $x6 = LD 0, %stack.[[SLOT2]] :: (load 8 from %stack.[[SLOT2]])
+; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT: $x3 = LI8 42
+; 64BIT-NEXT: $x5 = LI8 42
+; 64BIT-NEXT: BL8_NOP <mcsymbol .test_vararg>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit killed $x5, implicit $f2, implicit $x6, implicit $x2, implicit-def $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4:      stdu 1, -128(1)
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC1(2)
+; ASM64PWR4-NEXT: li 5, 42
+; ASM64PWR4-NEXT: lfs 1, 0([[REG]])
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC2(2)
+; ASM64PWR4-NEXT: stfd 1, 112(1)
+; ASM64PWR4-NEXT: lfd 2, 0([[REG]])
+; ASM64PWR4-NEXT: li 3, 42
+; ASM64PWR4-NEXT: stfd 2, 120(1)
+; ASM64PWR4-NEXT: ld 4, 112(1)
+; ASM64PWR4-NEXT: ld 6, 120(1)
+; ASM64PWR4-NEXT: bl .test_vararg
+; ASM64PWR4-NEXT: nop
+
+define void @call_test_vararg3() {
+entry:
+  %0 = load float, float* @f1, align 4
+  %conv = fpext float %0 to double
+  %1 = load double, double* @d1, align 8
+  call void (i32, ...) @test_vararg(i32 42, double %conv, i64 42, double %1)
+  ret void
+}
+
+; CHECK-LABEL:     name: call_test_vararg3
+
+; 32BIT:      renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got)
+; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1)
+; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got)
+; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
+; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
+; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8)
+; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4)
+; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
+; 32BIT-NEXT: renamable $r8 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8)
+; 32BIT-NEXT: renamable $r9 = LWZ 4, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]] + 4)
+; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT: $r3 = LI 42
+; 32BIT-NEXT: $r6 = LI 0
+; 32BIT-NEXT: $r7 = LI 42
+; 32BIT-NEXT: BL_NOP <mcsymbol .test_vararg>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit killed $r6, implicit killed $r7, implicit $f2, implicit $r8, implicit $r9, implicit $r2, implicit-def $r1
+; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; ASM32PWR4:      stwu 1, -80(1)
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC1(2)
+; ASM32PWR4-DAG:  li 6, 0
+; ASM32PWR4-DAG:  li 7, 42
+; ASM32PWR4-NEXT: lfs 1, 0([[REG]])
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC2(2)
+; ASM32PWR4-NEXT: stfd 1, 64(1)
+; ASM32PWR4-NEXT: lfd 2, 0([[REG]])
+; ASM32PWR4-NEXT: li 3, 42
+; ASM32PWR4-NEXT: stfd 2, 72(1)
+; ASM32PWR4-DAG:  lwz 4, 64(1)
+; ASM32PWR4-DAG:  lwz 5, 68(1)
+; ASM32PWR4-DAG:  lwz 8, 72(1)
+; ASM32PWR4-DAG:  lwz 9, 76(1)
+; ASM32PWR4-NEXT: bl .test_vararg
+; ASM32PWR4-NEXT: nop
+
+; 64BIT:      renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got)
+; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load 4 from @f1)
+; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got)
+; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
+; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load 8 from @d1)
+; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load 8 from %stack.[[SLOT1]])
+; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
+; 64BIT-NEXT: renamable $x6 = LD 0, %stack.[[SLOT2]] :: (load 8 from %stack.[[SLOT2]])
+; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT: $x3 = LI8 42
+; 64BIT-NEXT: $x5 = LI8 42
+; 64BIT-NEXT: BL8_NOP <mcsymbol .test_vararg>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit killed $x5, implicit $f2, implicit $x6, implicit $x2, implicit-def $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4:      stdu 1, -128(1)
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC1(2)
+; ASM64PWR4-NEXT: li 5, 42
+; ASM64PWR4-NEXT: lfs 1, 0([[REG]])
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC2(2)
+; ASM64PWR4-NEXT: stfd 1, 112(1)
+; ASM64PWR4-NEXT: lfd 2, 0([[REG]])
+; ASM64PWR4-NEXT: li 3, 42
+; ASM64PWR4-NEXT: stfd 2, 120(1)
+; ASM64PWR4-DAG: ld 4, 112(1)
+; ASM64PWR4-DAG: ld 6, 120(1)
+; ASM64PWR4-NEXT: bl .test_vararg
+; ASM64PWR4-NEXT: nop
+
+define void @call_test_vararg4() {
+entry:
+  %0 = load float, float* @f1, align 4
+  call void (i32, ...) @test_vararg(i32 42, float %0)
+  ret void
+}
+
+; CHECK-LABEL:     name: call_test_vararg4
+
+; 32BIT:      renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got)
+; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1)
+; 32BIT-NEXT: STFS renamable $f1, 0, %stack.[[SLOT:[0-9]+]] :: (store 4 into %stack.[[SLOT]])
+; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT]] :: (load 4 from %stack.[[SLOT]])
+; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT-NEXT: $r3 = LI 42
+; 32BIT-NEXT: BL_NOP <mcsymbol .test_vararg>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r2, implicit-def $r1
+; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; ASM32PWR4:      stwu 1, -64(1)
+; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC1(2)
+; ASM32PWR4-NEXT: lfs 1, 0([[REG]])
+; ASM32PWR4-NEXT: li 3, 42
+; ASM32PWR4-NEXT: stfs 1, 60(1)
+; ASM32PWR4-NEXT: lwz 4, 60(1)
+; ASM32PWR4-NEXT: bl .test_vararg
+; ASM32PWR4-NEXT: nop
+
+; 64BIT:      renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got)
+; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load 4 from @f1)
+; 64BIT-NEXT: STFS renamable $f1, 0, %stack.[[SLOT:[0-9]+]] :: (store 4 into %stack.[[SLOT]])
+; 64BIT-NEXT: renamable $x4 = LWZ8 0, %stack.[[SLOT]] :: (load 4 from %stack.[[SLOT]])
+; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT-NEXT: $x3 = LI8 42
+; 64BIT-NEXT: BL8_NOP <mcsymbol .test_vararg>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit $x2, implicit-def $r1
+; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+; ASM64PWR4:      stdu 1, -128(1)
+; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC1(2)
+; ASM64PWR4-NEXT: lfs 1, 0([[REG]])
+; ASM64PWR4-NEXT: li 3, 42
+; ASM64PWR4-NEXT: stfs 1, 124(1)
+; ASM64PWR4-NEXT: lwz 4, 124(1)
+; ASM64PWR4-NEXT: bl .test_vararg
+; ASM64PWR4-NEXT: nop
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll b/llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll
@@ -0,0 +1,23 @@
+; RUN: not llc < %s -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 2>&1 | FileCheck %s
+; RUN: not llc < %s -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 2>&1 | FileCheck %s
+
+; This test expects a compiler diagnostic for an AIX limitation on Altivec
+; support.  When the Altivec limitation diagnostic is removed, this test
+; should compile clean and fail in order to alert the author to validate the
+; instructions emitted to initialize the GPR for the double vararg.
+; The mfvsrwz and mfvsrd instructions should be used to initialize the GPR for
+; the double vararg without going through memory.
+
+@f1 = global float 0.000000e+00, align 4
+
+define void @call_test_vararg() {
+entry:
+  %0 = load float, float* @f1, align 4
+  %conv = fpext float %0 to double
+  call void (i32, ...) @test_vararg(i32 42, double %conv, float %0)
+  ret void
+}
+
+declare void @test_vararg(i32, ...)
+
+; CHECK: LLVM ERROR: Altivec support is unimplemented on AIX.