diff --git a/llvm/lib/Target/PowerPC/PPCCCState.h b/llvm/lib/Target/PowerPC/PPCCCState.h
--- a/llvm/lib/Target/PowerPC/PPCCCState.h
+++ b/llvm/lib/Target/PowerPC/PPCCCState.h
@@ -10,6 +10,7 @@
 #define PPCCCSTATE_H
 
 #include "PPCISelLowering.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 
@@ -36,6 +37,37 @@
   bool WasOriginalArgPPCF128(unsigned ValNo) { return OriginalArgWasPPCF128[ValNo]; }
   void clearWasPPCF128() { OriginalArgWasPPCF128.clear(); }
 };
-}
+
+class AIXCCState : public CCState {
+private:
+  BitVector IsFixed;
+
+public:
+  AIXCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
+             SmallVectorImpl<CCValAssign> &Locs, LLVMContext &C)
+      : CCState(CC, IsVarArg, MF, Locs, C) {}
+
+  void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                              CCAssignFn Fn) {
+    // All formal arguments are fixed.
+    IsFixed.resize(Ins.size(), true);
+    CCState::AnalyzeFormalArguments(Ins, Fn);
+  }
+
+  void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           CCAssignFn Fn) {
+    // Record whether the call operand was a fixed argument.
+    IsFixed.resize(Outs.size(), false);
+    for (unsigned ValNo = 0, E = Outs.size(); ValNo != E; ++ValNo)
+      if (Outs[ValNo].IsFixed)
+        IsFixed.set(ValNo);
+
+    CCState::AnalyzeCallOperands(Outs, Fn);
+  }
+
+  bool isFixed(unsigned ValNo) { return IsFixed.test(ValNo); }
+};
+
+} // end namespace llvm
 
 #endif
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -6262,10 +6262,43 @@
                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
 }
 
+// Returns true when the shadow of a general purpose argument register
+// in the parameter save area is aligned to at least 'RequiredAlign'.
+static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
+  assert(RequiredAlign.value() <= 16 &&
+         "Required alignment greater than stack alignment.");
+  switch (Reg) {
+  default:
+    report_fatal_error("called on invalid register.");
+  case PPC::R5:
+  case PPC::R9:
+  case PPC::X3:
+  case PPC::X5:
+  case PPC::X7:
+  case PPC::X9:
+    // These registers are 16 byte aligned which is the most strict aligment
+    // we can support.
+    return true;
+  case PPC::R3:
+  case PPC::R7:
+  case PPC::X4:
+  case PPC::X6:
+  case PPC::X8:
+  case PPC::X10:
+    // The shadow of these registers in the PSA is 8 byte aligned.
+    return RequiredAlign <= 8;
+  case PPC::R4:
+  case PPC::R6:
+  case PPC::R8:
+  case PPC::R10:
+    return RequiredAlign <= 4;
+  }
+}
+
 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
-                   CCState &State) {
-
+                   CCState &S) {
+  AIXCCState &State = static_cast<AIXCCState &>(S);
   const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
       State.getMachineFunction().getSubtarget());
   const bool IsPPC64 = Subtarget.isPPC64();
@@ -6397,18 +6430,97 @@
   case MVT::v2i64:
   case MVT::v2f64:
   case MVT::v1i128: {
-    if (State.isVarArg())
-      report_fatal_error(
-          "variadic arguments for vector types are unimplemented for AIX");
+    const unsigned VecSize = 16;
+    const Align VecAlign(VecSize);
+
+    if (!State.isVarArg()) {
+      // If there are vector registers remaining we don't consume any stack
+      // space.
+      if (unsigned VReg = State.AllocateReg(VR)) {
+        State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
+        return false;
+      }
+      // Vectors passed on the stack do not shadow GPRs or FPRs even though they
+      // might be allocated in the portion of the PSA that is shadowed by the
+      // GPRs.
+      const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
+      State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+      return false;
+    }
 
-    if (unsigned VReg = State.AllocateReg(VR)) {
-      State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
+    const unsigned PtrSize = IsPPC64 ? 8 : 4;
+    ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
+
+    unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
+    // Burn any underaligned registers and their shadowed stack space until
+    // we reach the required alignment.
+    while (NextRegIndex != GPRs.size() &&
+           !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
+      // Shadow allocate register and its stack shadow.
+      unsigned Reg = State.AllocateReg(GPRs);
+      State.AllocateStack(PtrSize, PtrAlign);
+      assert(Reg && "Allocating register unexpectedly failed.");
+      (void)Reg;
+      NextRegIndex = State.getFirstUnallocated(GPRs);
+    }
+
+    // Vectors that are passed as fixed arguments are handled differently.
+    // They are passed in VRs if any are available (unlike arguments passed
+    // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
+    // functions)
+    if (State.isFixed(ValNo)) {
+      if (unsigned VReg = State.AllocateReg(VR)) {
+        State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
+        // Shadow allocate GPRs and stack space even though we pass in a VR.
+        for (unsigned I = 0; I != VecSize; I += PtrSize)
+          State.AllocateReg(GPRs);
+        State.AllocateStack(VecSize, VecAlign);
+        return false;
+      }
+      // No vector registers remain so pass on the stack.
+      const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
+      State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
       return false;
     }
 
-    const unsigned VecSize = 16;
-    const unsigned Offset = State.AllocateStack(VecSize, Align(VecSize));
-    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    // If all GPRS are consumed then we pass the argument fully on the stack.
+    if (NextRegIndex == GPRs.size()) {
+      const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
+      State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+      return false;
+    }
+
+    // Corner case for 32-bit codegen. We have 2 registers to pass the first
+    // half of the argument, and then need to pass the remaining half on the
+    // stack.
+    if (GPRs[NextRegIndex] == PPC::R9) {
+      const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
+      State.addLoc(
+          CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+
+      const unsigned FirstReg = State.AllocateReg(PPC::R9);
+      const unsigned SecondReg = State.AllocateReg(PPC::R10);
+      assert(FirstReg && SecondReg &&
+             "Allocating R9 or R10 unexpectedly failed.");
+      State.addLoc(
+          CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
+      State.addLoc(
+          CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
+      return false;
+    }
+
+    // We have enough GPRs to fully pass the vector argument, and we have
+    // already consumed any underaligned registers. Start with the custom
+    // MemLoc and then the custom RegLocs.
+    const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
+    State.addLoc(
+        CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    for (unsigned I = 0; I != VecSize; I += PtrSize) {
+      const unsigned Reg = State.AllocateReg(GPRs);
+      assert(Reg && "Failed to allocated register for vararg vector argument");
+      State.addLoc(
+          CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
+    }
     return false;
   }
   }
@@ -6542,7 +6654,7 @@
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo &MFI = MF.getFrameInfo();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
-  CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
+  AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
 
   const EVT PtrVT = getPointerTy(MF.getDataLayout());
   // Reserve space for the linkage area on the stack.
@@ -6555,6 +6667,7 @@
   for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
     CCValAssign &VA = ArgLocs[I++];
     MVT LocVT = VA.getLocVT();
+    MVT ValVT = VA.getValVT();
     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
     // For compatibility with the AIX XL compiler, the float args in the
     // parameter save area are initialized even if the argument is available
@@ -6562,9 +6675,46 @@
     // and memory, however, the callee can choose to expect it in either.
     // The memloc is dismissed here because the argument is retrieved from
     // the register.
-    if (VA.isMemLoc() && VA.needsCustom())
+    if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
       continue;
 
+    auto HandleMemLoc = [&]() {
+      const unsigned LocSize = LocVT.getStoreSize();
+      const unsigned ValSize = ValVT.getStoreSize();
+      assert((ValSize <= LocSize) &&
+             "Object size is larger than size of MemLoc");
+      int CurArgOffset = VA.getLocMemOffset();
+      // Objects are right-justified because AIX is big-endian.
+      if (LocSize > ValSize)
+        CurArgOffset += LocSize - ValSize;
+      // Potential tail calls could cause overwriting of argument stack slots.
+      const bool IsImmutable =
+          !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+            (CallConv == CallingConv::Fast));
+      int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      SDValue ArgValue =
+          DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
+      InVals.push_back(ArgValue);
+    };
+
+    // Vector arguments to VaArg functions are passed both on the stack, and
+    // in any available GPRs. Load the value from the stack and add the GPRs
+    // as live ins.
+    if (VA.isMemLoc() && VA.needsCustom()) {
+      assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
+      assert(isVarArg && "Only use custom memloc for vararg.");
+      HandleMemLoc();
+      while (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
+        VA = ArgLocs[I++];
+        assert(VA.getValVT().isVector() &&
+               "Unexpected Val type for custom RegLoc.");
+        MVT::SimpleValueType SVT = VA.getLocVT().SimpleTy;
+        MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
+      }
+      continue;
+    }
+
     if (VA.isRegLoc()) {
       if (VA.getValVT().isScalarInteger())
         FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
@@ -6652,9 +6802,8 @@
       continue;
     }
 
-    EVT ValVT = VA.getValVT();
     if (VA.isRegLoc() && !VA.needsCustom()) {
-      MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
+      MVT::SimpleValueType SVT = ValVT.SimpleTy;
       unsigned VReg =
           MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
@@ -6667,23 +6816,7 @@
       continue;
     }
     if (VA.isMemLoc()) {
-      const unsigned LocSize = LocVT.getStoreSize();
-      const unsigned ValSize = ValVT.getStoreSize();
-      assert((ValSize <= LocSize) &&
-             "Object size is larger than size of MemLoc");
-      int CurArgOffset = VA.getLocMemOffset();
-      // Objects are right-justified because AIX is big-endian.
-      if (LocSize > ValSize)
-        CurArgOffset += LocSize - ValSize;
-      // Potential tail calls could cause overwriting of argument stack slots.
-      const bool IsImmutable =
-          !(getTargetMachine().Options.GuaranteedTailCallOpt &&
-            (CallConv == CallingConv::Fast));
-      int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
-      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      SDValue ArgValue =
-          DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
-      InVals.push_back(ArgValue);
+      HandleMemLoc();
       continue;
     }
   }
@@ -6764,8 +6897,8 @@
 
   MachineFunction &MF = DAG.getMachineFunction();
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
-                 *DAG.getContext());
+  AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
+                    *DAG.getContext());
 
   // Reserve space for the linkage save area (LSA) on the stack.
   // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
@@ -6934,11 +7067,15 @@
       continue;
     }
 
+    if (!ValVT.isFloatingPoint())
+      report_fatal_error(
+          "Unexpected register handling for calling convention.");
+
     // Custom handling is used for GPR initializations for vararg float
     // arguments.
     assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
-           ValVT.isFloatingPoint() && LocVT.isInteger() &&
-           "Unexpected register handling for calling convention.");
+           LocVT.isInteger() &&
+           "Custom register handling only expected for VarArg.");
 
     SDValue ArgAsInt =
         DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
diff --git a/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee-split.ll b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee-split.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee-split.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -verify-machineinstrs -stop-before=ppc-vsx-copy -vec-extabi \
+; RUN:     -mcpu=pwr7  -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN: FileCheck %s
+
+define <4 x i32> @split_spill(double %d1, double %d2, double %d3, ...) {
+  ; CHECK-LABEL: name: split_spill
+  ; CHECK: bb.0.entry:
+  ; CHECK:   liveins: $r9, $r10
+  ; CHECK:   [[COPY:%[0-9]+]]:gprc = COPY $r10
+  ; CHECK:   [[COPY1:%[0-9]+]]:gprc = COPY $r9
+  ; CHECK:   STW [[COPY1]], 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0, align 16)
+  ; CHECK:   STW [[COPY]], 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4)
+  ; CHECK:   LIFETIME_START %stack.0.arg_list
+  ; CHECK:   [[ADDI:%[0-9]+]]:gprc = ADDI %fixed-stack.0, 0
+  ; CHECK:   [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[ADDI]] :: (load 16 from %ir.4)
+  ; CHECK:   LIFETIME_END %stack.0.arg_list
+  ; CHECK:   $v2 = COPY [[LXVW4X]]
+  ; CHECK:   BLR implicit $lr, implicit $rm, implicit $v2
+entry:
+  %arg_list = alloca i8*, align 4
+  %0 = bitcast i8** %arg_list to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
+  call void @llvm.va_start(i8* nonnull %0)
+  %argp.cur = load i8*, i8** %arg_list, align 4
+  %1 = ptrtoint i8* %argp.cur to i32
+  %2 = add i32 %1, 15
+  %3 = and i32 %2, -16
+  %argp.cur.aligned = inttoptr i32 %3 to i8*
+  %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 16
+  store i8* %argp.next, i8** %arg_list, align 4
+  %4 = inttoptr i32 %3 to <4 x i32>*
+  %5 = load <4 x i32>, <4 x i32>* %4, align 16
+  call void @llvm.va_end(i8* nonnull %0)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
+  ret <4 x i32> %5
+}
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_end(i8*)
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
diff --git a/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee.ll b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -verify-machineinstrs -stop-before=ppc-vsx-copy -vec-extabi \
+; RUN:     -mcpu=pwr7  -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN: FileCheck %s
+
+define <4 x i32> @callee(i32 %count, ...) {
+  ; CHECK-LABEL: name: callee
+  ; CHECK: bb.0.entry:
+  ; CHECK:   liveins: $r4, $r5, $r6, $r7, $r8, $r9, $r10
+  ; CHECK:   [[COPY:%[0-9]+]]:gprc = COPY $r10
+  ; CHECK:   [[COPY1:%[0-9]+]]:gprc = COPY $r9
+  ; CHECK:   [[COPY2:%[0-9]+]]:gprc = COPY $r8
+  ; CHECK:   [[COPY3:%[0-9]+]]:gprc = COPY $r7
+  ; CHECK:   [[COPY4:%[0-9]+]]:gprc = COPY $r6
+  ; CHECK:   [[COPY5:%[0-9]+]]:gprc = COPY $r5
+  ; CHECK:   [[COPY6:%[0-9]+]]:gprc = COPY $r4
+  ; CHECK:   STW [[COPY6]], 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0)
+  ; CHECK:   STW [[COPY5]], 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4)
+  ; CHECK:   STW [[COPY4]], 8, %fixed-stack.0 :: (store 4)
+  ; CHECK:   STW [[COPY3]], 12, %fixed-stack.0 :: (store 4)
+  ; CHECK:   STW [[COPY2]], 16, %fixed-stack.0 :: (store 4)
+  ; CHECK:   STW [[COPY1]], 20, %fixed-stack.0 :: (store 4)
+  ; CHECK:   STW [[COPY]], 24, %fixed-stack.0 :: (store 4)
+  ; CHECK:   LIFETIME_START %stack.0.arg_list
+  ; CHECK:   [[ADDI:%[0-9]+]]:gprc = ADDI %fixed-stack.0, 0
+  ; CHECK:   STW killed [[ADDI]], 0, %stack.0.arg_list :: (store 4 into %ir.0)
+  ; CHECK:   [[ADDI1:%[0-9]+]]:gprc = ADDI %fixed-stack.0, 15
+  ; CHECK:   [[RLWINM:%[0-9]+]]:gprc = RLWINM killed [[ADDI1]], 0, 0, 27
+  ; CHECK:   [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[RLWINM]] :: (load 16 from %ir.4)
+  ; CHECK:   LIFETIME_END %stack.0.arg_list
+  ; CHECK:   $v2 = COPY [[LXVW4X]]
+  ; CHECK:   BLR implicit $lr, implicit $rm, implicit $v2
+entry:
+  %arg_list = alloca i8*, align 4
+  %0 = bitcast i8** %arg_list to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
+  call void @llvm.va_start(i8* nonnull %0)
+  %argp.cur = load i8*, i8** %arg_list, align 4
+  %1 = ptrtoint i8* %argp.cur to i32
+  %2 = add i32 %1, 15
+  %3 = and i32 %2, -16
+  %argp.cur.aligned = inttoptr i32 %3 to i8*
+  %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 16
+  store i8* %argp.next, i8** %arg_list, align 4
+  %4 = inttoptr i32 %3 to <4 x i32>*
+  %5 = load <4 x i32>, <4 x i32>* %4, align 16
+  call void @llvm.va_end(i8* nonnull %0)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
+  ret <4 x i32> %5
+}
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_end(i8*)
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
diff --git a/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-caller-split.ll b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-caller-split.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-caller-split.ll
@@ -0,0 +1,13 @@
+; RUN: not --crash llc -verify-machineinstrs -stop-before=ppc-vsx-copy -vec-extabi \
+; RUN:     -mcpu=pwr7  -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | \
+; RUN: FileCheck %s
+
+define void @caller() {
+entry:
+  %call = tail call <4 x i32> (double, double, double, ...) @split_spill(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, <4 x i32> <i32 1, i32 2, i32 3, i32 4>)
+  ret void
+}
+
+declare <4 x i32> @split_spill(double, double, double, ...)
+
+; CHECK: ERROR: Unexpected register handling for calling convention.
diff --git a/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-fixed-callee.ll b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-fixed-callee.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-fixed-callee.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -verify-machineinstrs -stop-before=ppc-vsx-copy -vec-extabi \
+; RUN:     -mcpu=pwr7  -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN: FileCheck %s
+
+define double @callee(i32 %count, <4 x i32> %vsi, double %next, ...) {
+  ; CHECK-LABEL: name: callee
+  ; CHECK: bb.0.entry:
+  ; CHECK:   LIFETIME_START %stack.0.arg_list
+  ; CHECK:   [[ADDI:%[0-9]+]]:gprc = ADDI %fixed-stack.0, 0
+  ; CHECK:   STW killed [[ADDI]], 0, %stack.0.arg_list :: (store 4 into %ir.0)
+  ; CHECK:   [[ADDI1:%[0-9]+]]:gprc = ADDI %fixed-stack.0, 15
+  ; CHECK:   [[RLWINM:%[0-9]+]]:gprc_and_gprc_nor0 = RLWINM killed [[ADDI1]], 0, 0, 27
+  ; CHECK:   [[ADDI2:%[0-9]+]]:gprc = nuw ADDI killed [[RLWINM]], 16
+  ; CHECK:   [[XFLOADf64_:%[0-9]+]]:vsfrc = XFLOADf64 $zero, killed [[ADDI2]] :: (load 8 from %ir.4, align 16)
+  ; CHECK:   LIFETIME_END %stack.0.arg_list
+  ; CHECK:   $f1 = COPY [[XFLOADf64_]]
+  ; CHECK:   BLR implicit $lr, implicit $rm, implicit $f1
+entry:
+  %arg_list = alloca i8*, align 4
+  %0 = bitcast i8** %arg_list to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
+  call void @llvm.va_start(i8* nonnull %0)
+  %argp.cur = load i8*, i8** %arg_list, align 4
+  %1 = ptrtoint i8* %argp.cur to i32
+  %2 = add i32 %1, 15
+  %3 = and i32 %2, -16
+  %argp.cur.aligned = inttoptr i32 %3 to i8*
+  %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 16
+  %argp.next3 = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 24
+  store i8* %argp.next3, i8** %arg_list, align 4
+  %4 = bitcast i8* %argp.next to double*
+  %5 = load double, double* %4, align 16
+  call void @llvm.va_end(i8* nonnull %0)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
+  ret double %5
+}
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_end(i8*)
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
diff --git a/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-callee.ll b/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-callee.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-callee.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -verify-machineinstrs -stop-before=ppc-vsx-copy -vec-extabi \
+; RUN:     -mcpu=pwr7  -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN: FileCheck %s
+
+define <4 x i32> @callee(i32 signext %count, ...) {
+  ; CHECK-LABEL: name: callee
+  ; CHECK: bb.0.entry:
+  ; CHECK:   liveins: $x4, $x5, $x6, $x7, $x8, $x9, $x10
+  ; CHECK:   [[COPY:%[0-9]+]]:g8rc = COPY $x10
+  ; CHECK:   [[COPY1:%[0-9]+]]:g8rc = COPY $x9
+  ; CHECK:   [[COPY2:%[0-9]+]]:g8rc = COPY $x8
+  ; CHECK:   [[COPY3:%[0-9]+]]:g8rc = COPY $x7
+  ; CHECK:   [[COPY4:%[0-9]+]]:g8rc = COPY $x6
+  ; CHECK:   [[COPY5:%[0-9]+]]:g8rc = COPY $x5
+  ; CHECK:   [[COPY6:%[0-9]+]]:g8rc = COPY $x4
+  ; CHECK:   STD [[COPY6]], 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0)
+  ; CHECK:   STD [[COPY5]], 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8)
+  ; CHECK:   STD [[COPY4]], 16, %fixed-stack.0 :: (store 8)
+  ; CHECK:   STD [[COPY3]], 24, %fixed-stack.0 :: (store 8)
+  ; CHECK:   STD [[COPY2]], 32, %fixed-stack.0 :: (store 8)
+  ; CHECK:   STD [[COPY1]], 40, %fixed-stack.0 :: (store 8)
+  ; CHECK:   STD [[COPY]], 48, %fixed-stack.0 :: (store 8)
+  ; CHECK:   LIFETIME_START %stack.0.arg_list
+  ; CHECK:   [[ADDI8_:%[0-9]+]]:g8rc = ADDI8 %fixed-stack.0, 0
+  ; CHECK:   STD killed [[ADDI8_]], 0, %stack.0.arg_list :: (store 8 into %ir.0)
+  ; CHECK:   [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 %fixed-stack.0, 15
+  ; CHECK:   [[RLDICR:%[0-9]+]]:g8rc = RLDICR killed [[ADDI8_1]], 0, 59
+  ; CHECK:   [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[RLDICR]] :: (load 16 from %ir.4)
+  ; CHECK:   LIFETIME_END %stack.0.arg_list
+  ; CHECK:   $v2 = COPY [[LXVW4X]]
+  ; CHECK:   BLR8 implicit $lr8, implicit $rm, implicit $v2
+entry:
+  %arg_list = alloca i8*, align 8
+  %0 = bitcast i8** %arg_list to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0)
+  call void @llvm.va_start(i8* nonnull %0)
+  %argp.cur = load i8*, i8** %arg_list, align 8
+  %1 = ptrtoint i8* %argp.cur to i64
+  %2 = add i64 %1, 15
+  %3 = and i64 %2, -16
+  %argp.cur.aligned = inttoptr i64 %3 to i8*
+  %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i64 16
+  store i8* %argp.next, i8** %arg_list, align 8
+  %4 = inttoptr i64 %3 to <4 x i32>*
+  %5 = load <4 x i32>, <4 x i32>* %4, align 16
+  call void @llvm.va_end(i8* nonnull %0)
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0)
+  ret <4 x i32> %5
+}
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_end(i8*)
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
diff --git a/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-fixed-callee.ll b/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-fixed-callee.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-fixed-callee.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -verify-machineinstrs -stop-before=ppc-vsx-copy -vec-extabi \
+; RUN:     -mcpu=pwr7  -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN: FileCheck %s
+
+define double @callee(i32 signext %count, <4 x i32> %vsi, double %next, ...) {
+  ; CHECK-LABEL: name: callee
+  ; CHECK: bb.0.entry:
+  ; CHECK:   liveins: $x8, $x9, $x10
+  ; CHECK:   [[COPY:%[0-9]+]]:g8rc = COPY $x10
+  ; CHECK:   [[COPY1:%[0-9]+]]:g8rc = COPY $x9
+  ; CHECK:   [[COPY2:%[0-9]+]]:g8rc = COPY $x8
+  ; CHECK:   STD [[COPY2]], 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0)
+  ; CHECK:   STD [[COPY1]], 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8)
+  ; CHECK:   STD [[COPY]], 16, %fixed-stack.0 :: (store 8)
+  ; CHECK:   LIFETIME_START %stack.0.arg_list
+  ; CHECK:   [[ADDI8_:%[0-9]+]]:g8rc = ADDI8 %fixed-stack.0, 0
+  ; CHECK:   STD killed [[ADDI8_]], 0, %stack.0.arg_list :: (store 8 into %ir.0)
+  ; CHECK:   [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 %fixed-stack.0, 15
+  ; CHECK:   [[RLDICR:%[0-9]+]]:g8rc_and_g8rc_nox0 = RLDICR killed [[ADDI8_1]], 0, 59
+  ; CHECK:   [[LI8_:%[0-9]+]]:g8rc = LI8 16
+  ; CHECK:   [[XFLOADf64_:%[0-9]+]]:vsfrc = XFLOADf64 killed [[RLDICR]], killed [[LI8_]] :: (load 8 from %ir.4, align 16)
+  ; CHECK:   LIFETIME_END %stack.0.arg_list
+  ; CHECK:   $f1 = COPY [[XFLOADf64_]]
+  ; CHECK:   BLR8 implicit $lr8, implicit $rm, implicit $f1
+entry:
+  %arg_list = alloca i8*, align 8
+  %0 = bitcast i8** %arg_list to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0)
+  call void @llvm.va_start(i8* nonnull %0)
+  %argp.cur = load i8*, i8** %arg_list, align 8
+  %1 = ptrtoint i8* %argp.cur to i64
+  %2 = add i64 %1, 15
+  %3 = and i64 %2, -16
+  %argp.cur.aligned = inttoptr i64 %3 to i8*
+  %argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i64 16
+  %argp.next3 = getelementptr inbounds i8, i8* %argp.cur.aligned, i64 24
+  store i8* %argp.next3, i8** %arg_list, align 8
+  %4 = bitcast i8* %argp.next to double*
+  %5 = load double, double* %4, align 16
+  call void @llvm.va_end(i8* nonnull %0)
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0)
+  ret double %5
+}
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_end(i8*)
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)