Index: llvm/trunk/include/llvm/Target/TargetLowering.h
===================================================================
--- llvm/trunk/include/llvm/Target/TargetLowering.h
+++ llvm/trunk/include/llvm/Target/TargetLowering.h
@@ -662,6 +662,16 @@
                                   unsigned &NumIntermediates,
                                   MVT &RegisterVT) const;
 
+  /// Certain targets such as MIPS require that some types such as vectors are
+  /// always broken down into scalars in some contexts. This occurs even if the
+  /// vector type is legal.
+  virtual unsigned getVectorTypeBreakdownForCallingConv(
+      LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+      unsigned &NumIntermediates, MVT &RegisterVT) const {
+    return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
+                                  RegisterVT);
+  }
+
   struct IntrinsicInfo {
     unsigned     opc = 0;          // target opcode
     EVT          memVT;            // memory VT
@@ -1002,6 +1012,33 @@
     llvm_unreachable("Unsupported extended type!");
   }
 
+  /// Certain combinations of ABIs, Targets and features require that types
+  /// are legal for some operations and not for other operations.
+  /// For MIPS all vector types must be passed through the integer register set.
+  virtual MVT getRegisterTypeForCallingConv(MVT VT) const {
+    return getRegisterType(VT);
+  }
+
+  virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+                                            EVT VT) const {
+    return getRegisterType(Context, VT);
+  }
+
+  /// Certain targets require unusual breakdowns of certain types. For MIPS,
+  /// this occurs when a vector type is used, as vector are passed through the
+  /// integer register set.
+  virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+                                                 EVT VT) const {
+    return getNumRegisters(Context, VT);
+  }
+
+  /// Certain targets have context senstive alignment requirements, where one
+  /// type has the alignment requirement of another type.
+  virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy,
+                                                 DataLayout DL) const {
+    return DL.getABITypeAlignment(ArgTy);
+  }
+
   /// If true, then instruction selection should seek to shrink the FP constant
   /// of the specified type to a smaller type in order to save space and / or
   /// reduce runtime.
Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -973,18 +973,28 @@
   /// expanded value requires multiple registers.
   SmallVector<unsigned, 4> Regs;
 
+  /// This list holds the number of registers for each value.
+  SmallVector<unsigned, 4> RegCount;
+
+  /// Records if this value needs to be treated in an ABI dependant manner,
+  /// different to normal type legalization.
+  bool IsABIMangled;
+
   RegsForValue();
 
-  RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt);
+  RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt,
+               bool IsABIMangledValue = false);
 
   RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
-               const DataLayout &DL, unsigned Reg, Type *Ty);
+               const DataLayout &DL, unsigned Reg, Type *Ty,
+               bool IsABIMangledValue = false);
 
   /// Add the specified values to this one.
   void append(const RegsForValue &RHS) {
     ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
     RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
     Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+    RegCount.push_back(RHS.Regs.size());
   }
 
   /// Emit a series of CopyFromReg nodes that copies from this value and returns
Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -115,7 +115,8 @@
 
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
                                       const SDValue *Parts, unsigned NumParts,
-                                      MVT PartVT, EVT ValueVT, const Value *V);
+                                      MVT PartVT, EVT ValueVT, const Value *V,
+                                      bool IsABIRegCopy);
 
 /// getCopyFromParts - Create a value that contains the specified legal parts
 /// combined into the value they represent.  If the parts combine to a type
@@ -125,10 +126,11 @@
 static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
                                 const SDValue *Parts, unsigned NumParts,
                                 MVT PartVT, EVT ValueVT, const Value *V,
-                                Optional<ISD::NodeType> AssertOp = None) {
+                                Optional<ISD::NodeType> AssertOp = None,
+                                bool IsABIRegCopy = false) {
   if (ValueVT.isVector())
     return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
-                                  PartVT, ValueVT, V);
+                                  PartVT, ValueVT, V, IsABIRegCopy);
 
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -272,7 +274,8 @@
 /// ValueVT (ISD::AssertSext).
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
                                       const SDValue *Parts, unsigned NumParts,
-                                      MVT PartVT, EVT ValueVT, const Value *V) {
+                                      MVT PartVT, EVT ValueVT, const Value *V,
+                                      bool IsABIRegCopy) {
   assert(ValueVT.isVector() && "Not a vector value");
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -283,9 +286,18 @@
     EVT IntermediateVT;
     MVT RegisterVT;
     unsigned NumIntermediates;
-    unsigned NumRegs =
-    TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
-                               NumIntermediates, RegisterVT);
+    unsigned NumRegs;
+
+    if (IsABIRegCopy) {
+      NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
+          *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
+          RegisterVT);
+    } else {
+      NumRegs =
+          TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+                                     NumIntermediates, RegisterVT);
+    }
+
     assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
     NumParts = NumRegs; // Silence a compiler warning.
     assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
@@ -314,9 +326,14 @@
 
     // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
     // intermediate operands.
+    EVT BuiltVectorTy =
+        EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
+                         (IntermediateVT.isVector()
+                              ? IntermediateVT.getVectorNumElements() * NumParts
+                              : NumIntermediates));
     Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
                                                 : ISD::BUILD_VECTOR,
-                      DL, ValueVT, Ops);
+                      DL, BuiltVectorTy, Ops);
   }
 
   // There is now one part, held in Val.  Correct it to match ValueVT.
@@ -355,13 +372,30 @@
       TLI.isTypeLegal(ValueVT))
     return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
-  // Handle cases such as i8 -> <1 x i1>
   if (ValueVT.getVectorNumElements() != 1) {
-    diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
-                                      "non-trivial scalar-to-vector conversion");
+
+    // Certain ABIs require that vectors are passed as integers. For vectors
+    // are the same size, this is an obvious bitcast.
+    if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
+      return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+    } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
+      // Bitcast Val back the original type and extract the corresponding
+      // vector we want.
+      unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
+      EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
+                                          ValueVT.getVectorElementType(), Elts);
+      Val = DAG.getBitcast(WiderVecType, Val);
+      return DAG.getNode(
+          ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+          DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+    }
+
+    diagnosePossiblyInvalidConstraint(
+        *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
     return DAG.getUNDEF(ValueVT);
   }
 
+  // Handle cases such as i8 -> <1 x i1>
   if (ValueVT.getVectorNumElements() == 1 &&
       ValueVT.getVectorElementType() != PartEVT)
     Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType());
@@ -371,7 +405,7 @@
 
 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
                                  SDValue Val, SDValue *Parts, unsigned NumParts,
-                                 MVT PartVT, const Value *V);
+                                 MVT PartVT, const Value *V, bool IsABIRegCopy);
 
 /// getCopyToParts - Create a series of nodes that contain the specified value
 /// split into legal parts.  If the parts contain more bits than Val, then, for
@@ -379,12 +413,14 @@
 static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
                            SDValue *Parts, unsigned NumParts, MVT PartVT,
                            const Value *V,
-                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND,
+                           bool IsABIRegCopy = false) {
   EVT ValueVT = Val.getValueType();
 
   // Handle the vector case separately.
   if (ValueVT.isVector())
-    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
+    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
+                                IsABIRegCopy);
 
   unsigned PartBits = PartVT.getSizeInBits();
   unsigned OrigNumParts = NumParts;
@@ -509,7 +545,9 @@
 /// value split into legal parts.
 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
                                  SDValue Val, SDValue *Parts, unsigned NumParts,
-                                 MVT PartVT, const Value *V) {
+                                 MVT PartVT, const Value *V,
+                                 bool IsABIRegCopy) {
+
   EVT ValueVT = Val.getValueType();
   assert(ValueVT.isVector() && "Not a vector");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -550,15 +588,22 @@
 
       // Promoted vector extract
       Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
-    } else{
+    } else {
       // Vector -> scalar conversion.
-      assert(ValueVT.getVectorNumElements() == 1 &&
-             "Only trivial vector-to-scalar conversions should get here!");
-      Val = DAG.getNode(
-          ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
-          DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+      if (ValueVT.getVectorNumElements() == 1) {
+        Val = DAG.getNode(
+            ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
+            DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
 
-      Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+        Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+      } else {
+        assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
+               "lossy conversion of vector to scalar type");
+        EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(),
+                                                 ValueVT.getSizeInBits());
+        Val = DAG.getBitcast(IntermediateType, Val);
+        Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+      }
     }
 
     Parts[0] = Val;
@@ -569,15 +614,31 @@
   EVT IntermediateVT;
   MVT RegisterVT;
   unsigned NumIntermediates;
-  unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
-                                                IntermediateVT,
-                                                NumIntermediates, RegisterVT);
+  unsigned NumRegs;
+  if (IsABIRegCopy) {
+    NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
+        *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
+        RegisterVT);
+  } else {
+    NumRegs =
+        TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+                                   NumIntermediates, RegisterVT);
+  }
   unsigned NumElements = ValueVT.getVectorNumElements();
 
   assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
   NumParts = NumRegs; // Silence a compiler warning.
   assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
 
+  // Convert the vector to the appropiate type if necessary.
+  unsigned DestVectorNoElts =
+      NumIntermediates *
+      (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1);
+  EVT BuiltVectorTy = EVT::getVectorVT(
+      *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
+  if (Val.getValueType() != BuiltVectorTy)
+    Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
+
   // Split the vector into intermediate operands.
   SmallVector<SDValue, 8> Ops(NumIntermediates);
   for (unsigned i = 0; i != NumIntermediates; ++i) {
@@ -610,22 +671,31 @@
   }
 }
 
-RegsForValue::RegsForValue() {}
+RegsForValue::RegsForValue() { IsABIMangled = false; }
 
 RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
-                           EVT valuevt)
-    : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+                           EVT valuevt, bool IsABIMangledValue)
+    : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
+      RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {}
 
 RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
-                           const DataLayout &DL, unsigned Reg, Type *Ty) {
+                           const DataLayout &DL, unsigned Reg, Type *Ty,
+                           bool IsABIMangledValue) {
   ComputeValueVTs(TLI, DL, Ty, ValueVTs);
 
+  IsABIMangled = IsABIMangledValue;
+
   for (EVT ValueVT : ValueVTs) {
-    unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT);
-    MVT RegisterVT = TLI.getRegisterType(Context, ValueVT);
+    unsigned NumRegs = IsABIMangledValue
+                           ? TLI.getNumRegistersForCallingConv(Context, ValueVT)
+                           : TLI.getNumRegisters(Context, ValueVT);
+    MVT RegisterVT = IsABIMangledValue
+                         ? TLI.getRegisterTypeForCallingConv(Context, ValueVT)
+                         : TLI.getRegisterType(Context, ValueVT);
     for (unsigned i = 0; i != NumRegs; ++i)
       Regs.push_back(Reg + i);
     RegVTs.push_back(RegisterVT);
+    RegCount.push_back(NumRegs);
     Reg += NumRegs;
   }
 }
@@ -646,8 +716,10 @@
   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
     // Copy the legal parts from the registers.
     EVT ValueVT = ValueVTs[Value];
-    unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
-    MVT RegisterVT = RegVTs[Value];
+    unsigned NumRegs = RegCount[Value];
+    MVT RegisterVT = IsABIMangled
+                         ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
+                         : RegVTs[Value];
 
     Parts.resize(NumRegs);
     for (unsigned i = 0; i != NumRegs; ++i) {
@@ -742,9 +814,11 @@
   unsigned NumRegs = Regs.size();
   SmallVector<SDValue, 8> Parts(NumRegs);
   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    EVT ValueVT = ValueVTs[Value];
-    unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
-    MVT RegisterVT = RegVTs[Value];
+    unsigned NumParts = RegCount[Value];
+
+    MVT RegisterVT = IsABIMangled
+                         ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
+                         : RegVTs[Value];
 
     if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
       ExtendKind = ISD::ZERO_EXTEND;
@@ -967,10 +1041,16 @@
 
   if (It != FuncInfo.ValueMap.end()) {
     unsigned InReg = It->second;
+    bool IsABIRegCopy =
+        V && ((isa<CallInst>(V) &&
+               !(static_cast<const CallInst *>(V))->isInlineAsm()) ||
+              isa<ReturnInst>(V));
+
     RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
-                     DAG.getDataLayout(), InReg, Ty);
+                     DAG.getDataLayout(), InReg, Ty, IsABIRegCopy);
     SDValue Chain = DAG.getEntryNode();
-    Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
+    Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
+                                 V);
     resolveDanglingDebugInfo(V, Result);
   }
 
@@ -1157,8 +1237,13 @@
   // If this is an instruction which fast-isel has deferred, select it now.
   if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
     unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+    bool IsABIRegCopy =
+        V && ((isa<CallInst>(V) &&
+               !(static_cast<const CallInst *>(V))->isInlineAsm()) ||
+              isa<ReturnInst>(V));
+
     RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
-                     Inst->getType());
+                     Inst->getType(), IsABIRegCopy);
     SDValue Chain = DAG.getEntryNode();
     return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
   }
@@ -1386,12 +1471,12 @@
         if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
           VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
 
-        unsigned NumParts = TLI.getNumRegisters(Context, VT);
-        MVT PartVT = TLI.getRegisterType(Context, VT);
+        unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT);
+        MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT);
         SmallVector<SDValue, 4> Parts(NumParts);
         getCopyToParts(DAG, getCurSDLoc(),
                        SDValue(RetOp.getNode(), RetOp.getResNo() + j),
-                       &Parts[0], NumParts, PartVT, &I, ExtendKind);
+                       &Parts[0], NumParts, PartVT, &I, ExtendKind, true);
 
         // 'inreg' on function refers to return value
         ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
@@ -7064,8 +7149,8 @@
 
           SDLoc dl = getCurSDLoc();
           // Use the produced MatchedRegs object to
-          MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl,
-                                    Chain, &Flag, CS.getInstruction());
+          MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
+                                    CS.getInstruction());
           MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
                                            true, OpInfo.getMatchedOperand(), dl,
                                            DAG, AsmNodeOperands);
@@ -7681,8 +7766,10 @@
   } else {
     for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
       EVT VT = RetTys[I];
-      MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
-      unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+      MVT RegisterVT =
+          getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+      unsigned NumRegs =
+          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
       for (unsigned i = 0; i != NumRegs; ++i) {
         ISD::InputArg MyFlags;
         MyFlags.VT = RegisterVT;
@@ -7731,7 +7818,11 @@
       SDValue Op = SDValue(Args[i].Node.getNode(),
                            Args[i].Node.getResNo() + Value);
       ISD::ArgFlagsTy Flags;
-      unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
+
+      // Certain targets (such as MIPS), may have a different ABI alignment
+      // for a type depending on the context. Give the target a chance to
+      // specify the alignment it wants.
+      unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL);
 
       if (Args[i].IsZExt)
         Flags.setZExt();
@@ -7786,8 +7877,9 @@
         Flags.setInConsecutiveRegs();
       Flags.setOrigAlign(OriginalAlignment);
 
-      MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
-      unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT);
+      MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+      unsigned NumParts =
+          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
       SmallVector<SDValue, 4> Parts(NumParts);
       ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
 
@@ -7817,7 +7909,8 @@
       }
 
       getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
-                     CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind);
+                     CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind,
+                     true);
 
       for (unsigned j = 0; j != NumParts; ++j) {
         // if it isn't first piece, alignment must be 1
@@ -7917,12 +8010,14 @@
     unsigned CurReg = 0;
     for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
       EVT VT = RetTys[I];
-      MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
-      unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+      MVT RegisterVT =
+          getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+      unsigned NumRegs =
+          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
 
       ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
                                               NumRegs, RegisterVT, VT, nullptr,
-                                              AssertOp));
+                                              AssertOp, true));
       CurReg += NumRegs;
     }
 
@@ -7958,8 +8053,15 @@
   assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  // If this is an InlineAsm we have to match the registers required, not the
+  // notional registers required by the type.
+  bool IsABIRegCopy =
+    V && ((isa<CallInst>(V) &&
+           !(static_cast<const CallInst *>(V))->isInlineAsm()) ||
+          isa<ReturnInst>(V));
+
   RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
-                   V->getType());
+                   V->getType(), IsABIRegCopy);
   SDValue Chain = DAG.getEntryNode();
 
   ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
@@ -8202,7 +8304,12 @@
       EVT VT = ValueVTs[Value];
       Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
       ISD::ArgFlagsTy Flags;
-      unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
+
+      // Certain targets (such as MIPS), may have a different ABI alignment
+      // for a type depending on the context. Give the target a chance to
+      // specify the alignment it wants.
+      unsigned OriginalAlignment =
+          TLI->getABIAlignmentForCallingConv(ArgTy, DL);
 
       if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
         Flags.setZExt();
@@ -8264,8 +8371,10 @@
       if (ArgCopyElisionCandidates.count(&Arg))
         Flags.setCopyElisionCandidate();
 
-      MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
-      unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
+      MVT RegisterVT =
+          TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
+      unsigned NumRegs =
+          TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
       for (unsigned i = 0; i != NumRegs; ++i) {
         ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
                               Idx-1, PartBase+i*RegisterVT.getStoreSize());
@@ -8372,8 +8481,10 @@
 
     for (unsigned Val = 0; Val != NumValues; ++Val) {
       EVT VT = ValueVTs[Val];
-      MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
-      unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT);
+      MVT PartVT =
+          TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
+      unsigned NumParts =
+          TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
 
       // Even an apparant 'unused' swifterror argument needs to be returned. So
       // we do generate a copy for it that can be used on return from the
@@ -8386,7 +8497,8 @@
           AssertOp = ISD::AssertZext;
 
         ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
-                                             PartVT, VT, nullptr, AssertOp));
+                                             PartVT, VT, nullptr, AssertOp,
+                                             true));
       }
 
       i += NumParts;
Index: llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -835,7 +835,7 @@
       //       completely and make statepoint call to return a tuple.
       unsigned Reg = FuncInfo.CreateRegs(RetTy);
       RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
-                       DAG.getDataLayout(), Reg, RetTy);
+                       DAG.getDataLayout(), Reg, RetTy, true);
       SDValue Chain = DAG.getEntryNode();
 
       RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
Index: llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
+++ llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
@@ -1616,8 +1616,10 @@
         VT = MinVT;
     }
 
-    unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
-    MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+    unsigned NumParts =
+        TLI.getNumRegistersForCallingConv(ReturnType->getContext(), VT);
+    MVT PartVT =
+        TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), VT);
 
     // 'inreg' on function refers to return value
     ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
Index: llvm/trunk/lib/Target/Mips/MipsCCState.h
===================================================================
--- llvm/trunk/lib/Target/Mips/MipsCCState.h
+++ llvm/trunk/lib/Target/Mips/MipsCCState.h
@@ -45,16 +45,33 @@
                          const SDNode *CallNode);
 
   /// Identify lowered values that originated from f128 arguments and record
-  /// this.
+  /// this for use by RetCC_MipsN.
   void
   PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl<ISD::InputArg> &Ins);
 
+  void PreAnalyzeCallResultForVectorFloat(
+      const SmallVectorImpl<ISD::InputArg> &Ins,
+      const TargetLowering::CallLoweringInfo &CLI);
+
+  void PreAnalyzeFormalArgumentsForVectorFloat(
+      const SmallVectorImpl<ISD::InputArg> &Ins);
+
+  void
+  PreAnalyzeReturnForVectorFloat(const SmallVectorImpl<ISD::OutputArg> &Outs);
+
   /// Records whether the value has been lowered from an f128.
   SmallVector<bool, 4> OriginalArgWasF128;
 
   /// Records whether the value has been lowered from float.
   SmallVector<bool, 4> OriginalArgWasFloat;
 
+  /// Records whether the value has been lowered from a floating point vector.
+  SmallVector<bool, 4> OriginalArgWasFloatVector;
+
+  /// Records whether the return value has been lowered from a floating point
+  /// vector.
+  SmallVector<bool, 4> OriginalRetWasFloatVector;
+
   /// Records whether the value was a fixed argument.
   /// See ISD::OutputArg::IsFixed,
   SmallVector<bool, 4> CallOperandIsFixed;
@@ -78,6 +95,7 @@
     CCState::AnalyzeCallOperands(Outs, Fn);
     OriginalArgWasF128.clear();
     OriginalArgWasFloat.clear();
+    OriginalArgWasFloatVector.clear();
     CallOperandIsFixed.clear();
   }
 
@@ -96,31 +114,38 @@
     CCState::AnalyzeFormalArguments(Ins, Fn);
     OriginalArgWasFloat.clear();
     OriginalArgWasF128.clear();
+    OriginalArgWasFloatVector.clear();
   }
 
   void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
                          CCAssignFn Fn,
                          const TargetLowering::CallLoweringInfo &CLI) {
     PreAnalyzeCallResultForF128(Ins, CLI);
+    PreAnalyzeCallResultForVectorFloat(Ins, CLI);
     CCState::AnalyzeCallResult(Ins, Fn);
     OriginalArgWasFloat.clear();
     OriginalArgWasF128.clear();
+    OriginalArgWasFloatVector.clear();
   }
 
   void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
                      CCAssignFn Fn) {
     PreAnalyzeReturnForF128(Outs);
+    PreAnalyzeReturnForVectorFloat(Outs);
     CCState::AnalyzeReturn(Outs, Fn);
     OriginalArgWasFloat.clear();
     OriginalArgWasF128.clear();
+    OriginalArgWasFloatVector.clear();
   }
 
   bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
                    CCAssignFn Fn) {
     PreAnalyzeReturnForF128(ArgsFlags);
+    PreAnalyzeReturnForVectorFloat(ArgsFlags);
     bool Return = CCState::CheckReturn(ArgsFlags, Fn);
     OriginalArgWasFloat.clear();
     OriginalArgWasF128.clear();
+    OriginalArgWasFloatVector.clear();
     return Return;
   }
 
@@ -128,6 +153,13 @@
   bool WasOriginalArgFloat(unsigned ValNo) {
       return OriginalArgWasFloat[ValNo];
   }
+  bool WasOriginalArgVectorFloat(unsigned ValNo) const {
+    return OriginalArgWasFloatVector[ValNo];
+  }
+  bool WasOriginalRetVectorFloat(unsigned ValNo) const {
+    return OriginalRetWasFloatVector[ValNo];
+  }
+
   bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; }
   SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; }
 };
Index: llvm/trunk/lib/Target/Mips/MipsCCState.cpp
===================================================================
--- llvm/trunk/lib/Target/Mips/MipsCCState.cpp
+++ llvm/trunk/lib/Target/Mips/MipsCCState.cpp
@@ -54,6 +54,22 @@
   return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol()));
 }
 
+/// Return true if the original type was vXfXX.
+static bool originalEVTTypeIsVectorFloat(EVT Ty) {
+  if (Ty.isVector() && Ty.getVectorElementType().isFloatingPoint())
+    return true;
+
+  return false;
+}
+
+/// Return true if the original type was vXfXX / vXfXX.
+static bool originalTypeIsVectorFloat(Type * Ty) {
+  if (Ty->isVectorTy() && Ty->isFPOrFPVectorTy())
+    return true;
+
+  return false;
+}
+
 MipsCCState::SpecialCallingConvType
 MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee,
                                             const MipsSubtarget &Subtarget) {
@@ -81,8 +97,8 @@
   }
 }
 
-/// Identify lowered values that originated from f128 arguments and record
-/// this for use by RetCC_MipsN.
+/// Identify lowered values that originated from f128 or float arguments and
+/// record this for use by RetCC_MipsN.
 void MipsCCState::PreAnalyzeReturnForF128(
     const SmallVectorImpl<ISD::OutputArg> &Outs) {
   const MachineFunction &MF = getMachineFunction();
@@ -94,26 +110,50 @@
   }
 }
 
-/// Identify lowered values that originated from f128 arguments and record
+/// Identify lower values that originated from vXfXX and record
+/// this.
+void MipsCCState::PreAnalyzeCallResultForVectorFloat(
+    const SmallVectorImpl<ISD::InputArg> &Ins,
+    const TargetLowering::CallLoweringInfo &CLI) {
+  for (unsigned i = 0; i < Ins.size(); ++i) {
+    OriginalRetWasFloatVector.push_back(
+        originalTypeIsVectorFloat(CLI.RetTy));
+  }
+}
+
+/// Identify lowered values that originated from vXfXX arguments and record
 /// this.
+void MipsCCState::PreAnalyzeReturnForVectorFloat(
+    const SmallVectorImpl<ISD::OutputArg> &Outs) {
+  for (unsigned i = 0; i < Outs.size(); ++i) {
+    ISD::OutputArg Out = Outs[i];
+    OriginalRetWasFloatVector.push_back(
+        originalEVTTypeIsVectorFloat(Out.ArgVT));
+  }
+}
+/// Identify lowered values that originated from f128, float and sret to vXfXX
+/// arguments and record this.
 void MipsCCState::PreAnalyzeCallOperands(
     const SmallVectorImpl<ISD::OutputArg> &Outs,
     std::vector<TargetLowering::ArgListEntry> &FuncArgs,
     const SDNode *CallNode) {
   for (unsigned i = 0; i < Outs.size(); ++i) {
-    OriginalArgWasF128.push_back(
-        originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, CallNode));
-    OriginalArgWasFloat.push_back(
-        FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy());
+    TargetLowering::ArgListEntry FuncArg = FuncArgs[Outs[i].OrigArgIndex];
+
+    OriginalArgWasF128.push_back(originalTypeIsF128(FuncArg.Ty, CallNode));
+    OriginalArgWasFloat.push_back(FuncArg.Ty->isFloatingPointTy());
+
+    OriginalArgWasFloatVector.push_back(FuncArg.Ty->isVectorTy());
     CallOperandIsFixed.push_back(Outs[i].IsFixed);
   }
 }
 
-/// Identify lowered values that originated from f128 arguments and record
-/// this.
+/// Identify lowered values that originated from f128, float and vXfXX arguments
+/// and record this.
 void MipsCCState::PreAnalyzeFormalArgumentsForF128(
     const SmallVectorImpl<ISD::InputArg> &Ins) {
   const MachineFunction &MF = getMachineFunction();
+
   for (unsigned i = 0; i < Ins.size(); ++i) {
     Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
 
@@ -123,6 +163,7 @@
     if (Ins[i].Flags.isSRet()) {
       OriginalArgWasF128.push_back(false);
       OriginalArgWasFloat.push_back(false);
+      OriginalArgWasFloatVector.push_back(false);
       continue;
     }
 
@@ -132,5 +173,10 @@
     OriginalArgWasF128.push_back(
         originalTypeIsF128(FuncArg->getType(), nullptr));
     OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy());
+
+    // The MIPS vector ABI exhibits a corner case of sorts or quirk; if the
+    // first argument is actually an SRet pointer to a vector, then the next
+    // argument slot is $a2.
+    OriginalArgWasFloatVector.push_back(FuncArg->getType()->isVectorTy());
   }
 }
Index: llvm/trunk/lib/Target/Mips/MipsCallingConv.td
===================================================================
--- llvm/trunk/lib/Target/Mips/MipsCallingConv.td
+++ llvm/trunk/lib/Target/Mips/MipsCallingConv.td
@@ -37,6 +37,10 @@
 class CCIfArgIsVarArg<CCAction A>
     : CCIf<"!static_cast<MipsCCState *>(&State)->IsCallOperandFixed(ValNo)", A>;
 
+/// Match if the return was a floating point vector.
+class CCIfOrigArgWasNotVectorFloat<CCAction A>
+    : CCIf<"!static_cast<MipsCCState *>(&State)"
+                "->WasOriginalRetVectorFloat(ValNo)", A>;
 
 /// Match if the special calling conv is the specified value.
 class CCIfSpecialCallingConv<string CC, CCAction A>
@@ -93,8 +97,10 @@
   // Promote i1/i8/i16 return values to i32.
   CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
 
-  // i32 are returned in registers V0, V1, A0, A1
-  CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>,
+  // i32 are returned in registers V0, V1, A0, A1, unless the original return
+  // type was a vector of floats.
+  CCIfOrigArgWasNotVectorFloat<CCIfType<[i32],
+                                        CCAssignToReg<[V0, V1, A0, A1]>>>,
 
   // f32 are returned in registers F0, F2
   CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
Index: llvm/trunk/lib/Target/Mips/MipsISelLowering.h
===================================================================
--- llvm/trunk/lib/Target/Mips/MipsISelLowering.h
+++ llvm/trunk/lib/Target/Mips/MipsISelLowering.h
@@ -248,6 +248,33 @@
     bool isCheapToSpeculateCttz() const override;
     bool isCheapToSpeculateCtlz() const override;
 
+    /// Return the register type for a given MVT, ensuring vectors are treated
+    /// as a series of gpr sized integers.
+    virtual MVT getRegisterTypeForCallingConv(MVT VT) const override;
+
+    /// Return the register type for a given MVT, ensuring vectors are treated
+    /// as a series of gpr sized integers.
+    virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+                                              EVT VT) const override;
+
+    /// Return the number of registers for a given MVT, ensuring vectors are
+    /// treated as a series of gpr sized integers.
+    virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+                                                   EVT VT) const override;
+
+    /// Break down vectors to the correct number of gpr sized integers.
+    virtual unsigned getVectorTypeBreakdownForCallingConv(
+        LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+        unsigned &NumIntermediates, MVT &RegisterVT) const override;
+
+    /// Return the correct alignment for the current calling convention.
+    virtual unsigned
+    getABIAlignmentForCallingConv(Type *ArgTy, DataLayout DL) const override {
+      if (ArgTy->isVectorTy())
+        return std::min(DL.getABITypeAlignment(ArgTy), 8U);
+      return DL.getABITypeAlignment(ArgTy);
+    }
+
     ISD::NodeType getExtendForAtomicOps() const override {
       return ISD::SIGN_EXTEND;
     }
Index: llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp
+++ llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp
@@ -71,6 +71,48 @@
   return true;
 }
 
+// The MIPS MSA ABI passes vector arguments in the integer register set.
+// The number of integer registers used is dependant on the ABI used.
+MVT MipsTargetLowering::getRegisterTypeForCallingConv(MVT VT) const {
+  if (VT.isVector() && Subtarget.hasMSA())
+    return Subtarget.isABI_O32() ? MVT::i32 : MVT::i64;
+  return MipsTargetLowering::getRegisterType(VT);
+}
+
+MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+                                                      EVT VT) const {
+  if (VT.isVector()) {
+      if (Subtarget.isABI_O32()) {
+        return MVT::i32;
+      } else {
+        return (VT.getSizeInBits() == 32) ? MVT::i32 : MVT::i64;
+      }
+  }
+  return MipsTargetLowering::getRegisterType(Context, VT);
+}
+
+unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+                                                           EVT VT) const {
+  if (VT.isVector())
+    return std::max((VT.getSizeInBits() / (Subtarget.isABI_O32() ? 32 : 64)),
+                    1U);
+  return MipsTargetLowering::getNumRegisters(Context, VT);
+}
+
+unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv(
+    LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+    unsigned &NumIntermediates, MVT &RegisterVT) const {
+
+  // Break down vector types to either 2 i64s or 4 i32s.
+  RegisterVT = getRegisterTypeForCallingConv(Context, VT) ;
+  IntermediateVT = RegisterVT;
+  NumIntermediates = VT.getSizeInBits() < RegisterVT.getSizeInBits()
+                         ? VT.getVectorNumElements()
+                         : VT.getSizeInBits() / RegisterVT.getSizeInBits();
+
+  return NumIntermediates;
+}
+
 SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
   MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>();
   return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
@@ -2515,6 +2557,11 @@
 //       yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
 //       not used, it must be shadowed. If only A3 is available, shadow it and
 //       go to stack.
+// vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack.
+// vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3}
+//         with the remainder spilled to the stack.
+// vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases
+//         spilling the remainder to the stack.
 //
 //  For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack.
 //===----------------------------------------------------------------------===//
@@ -2526,8 +2573,13 @@
       State.getMachineFunction().getSubtarget());
 
   static const MCPhysReg IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 };
+
+  const MipsCCState * MipsState = static_cast<MipsCCState *>(&State);
+
   static const MCPhysReg F32Regs[] = { Mips::F12, Mips::F14 };
 
+  static const MCPhysReg FloatVectorIntRegs[] = { Mips::A0, Mips::A2 };
+
   // Do not process byval args here.
   if (ArgFlags.isByVal())
     return true;
@@ -2565,8 +2617,26 @@
                                 State.getFirstUnallocated(F32Regs) != ValNo;
   unsigned OrigAlign = ArgFlags.getOrigAlign();
   bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8);
+  bool isVectorFloat = MipsState->WasOriginalArgVectorFloat(ValNo);
 
-  if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
+  // The MIPS vector ABI for floats passes them in a pair of registers
+  if (ValVT == MVT::i32 && isVectorFloat) {
+    // This is the start of an vector that was scalarized into an unknown number
+    // of components. It doesn't matter how many there are. Allocate one of the
+    // notional 8 byte aligned registers which map onto the argument stack, and
+    // shadow the register lost to alignment requirements.
+    if (ArgFlags.isSplit()) {
+      Reg = State.AllocateReg(FloatVectorIntRegs);
+      if (Reg == Mips::A2)
+        State.AllocateReg(Mips::A1);
+      else if (Reg == 0)
+        State.AllocateReg(Mips::A3);
+    } else {
+      // If we're an intermediate component of the split, we can just attempt to
+      // allocate a register directly.
+      Reg = State.AllocateReg(IntRegs);
+    }
+  } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
     Reg = State.AllocateReg(IntRegs);
     // If this is the first part of an i64 arg,
     // the allocated register must be either A0 or A2.
Index: llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp
+++ llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -283,10 +283,12 @@
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
   uint64_t stackSize = MF.getFrameInfo().getStackSize();
   int64_t spOffset = MF.getFrameInfo().getObjectOffset(FrameIndex);
+  unsigned alignment = MF.getFrameInfo().getObjectAlignment(FrameIndex);
 
   DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
                << "spOffset   : " << spOffset << "\n"
-               << "stackSize  : " << stackSize << "\n");
+               << "stackSize  : " << stackSize << "\n"
+               << "alignment  : " << alignment << "\n");
 
   eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset);
 }
Index: llvm/trunk/test/CodeGen/Mips/cconv/vector.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/cconv/vector.ll
+++ llvm/trunk/test/CodeGen/Mips/cconv/vector.ll
@@ -0,0 +1,1657 @@
+; RUN: llc < %s -march=mips -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EB
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB
+; RUN: llc < %s -march=mips -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EB
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5
+; RUN: llc < %s -march=mipsel -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EL
+; RUN: llc < %s -march=mips64el -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL
+; RUN: llc < %s -march=mipsel -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EL
+; RUN: llc < %s -march=mips64el -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5
+
+
+
+; Test that vector types are passed through the integer register set whether or
+; not MSA is enabled. This is a ABI requirement for MIPS. For GCC compatibility
+; we need to handle any power of 2 number of elements. We will test this
+; exhaustively for combinations up to MSA register (128 bits) size.
+
+; First set of tests are for argument passing.
+
+define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) {
+; ALL-LABEL: i8_2:
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 16
+
+; MIPS32EL: addu $1, $4, $5
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 56
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 56
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 48
+
+; MIPS64EL-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64EL-DAG: sll ${{[0-9]+}}, $5, 0
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+  %1 = add <2 x i8> %a, %b
+  ret <2 x i8> %1
+}
+
+; Test that vector spilled to the outgoing argument area have the expected
+; offset from $sp.
+
+define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d,
+                        <2 x i8> %e, <2 x i8> %f, <2 x i8> %g) {
+entry:
+
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $6, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $7, 24
+
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $4, 65280
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $5, 65280
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $6, 65280
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $7, 65280
+
+; MIPS32-DAG: lbu ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG; lbu ${{[0-9]+}}, 17($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 21($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 25($sp)
+
+; MIPS32R5-DAG: sw $4, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $5, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $6, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $7, {{[0-9]+}}($sp)
+
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 40($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 41($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 42($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 43($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 44($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 45($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 46($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 47($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 48($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 49($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 50($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 51($sp)
+
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $6, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $7, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $8, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $9, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $10, 48
+
+; MIPS64R5-DAG: sd $4, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $5, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $6, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $7, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $8, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $9, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $10, {{[0-9]+}}($sp)
+
+  %0 = add <2 x i8> %a, %b
+  %1 = add <2 x i8> %0, %c
+  %2 = add <2 x i8> %1, %d
+  %3 = add <2 x i8> %2, %e
+  %4 = add <2 x i8> %3, %f
+  %5 = add <2 x i8> %4, %g
+  ret <2 x i8> %5
+}
+
+define <4 x i8> @i8_4(<4 x i8> %a, <4 x i8> %b) {
+; ALL-LABEL: i8_4:
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $5, 0
+
+  %1 = add <4 x i8> %a, %b
+  ret <4 x i8> %1
+}
+
+define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) {
+; ALL-LABEL: i8_8:
+; MIPS32-NOT: lw
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 24
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 24
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 16
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 8
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 8
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+  %1 = add <8 x i8> %a, %b
+  ret <8 x i8> %1
+}
+
+define <16 x i8> @i8_16(<16 x i8> %a, <16 x i8> %b) {
+; ALL-LABEL: i8_16:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+  %1 = add <16 x i8> %a, %b
+
+  ret <16 x i8> %1
+}
+
+define <2 x i16> @i16_2(<2 x i16> %a, <2 x i16> %b) {
+; ALL-LABEL: i16_2:
+; MIPS32: addu    $[[R0:[0-9]+]], $4, $5
+; MIPS32: andi    $[[R1:[0-9]+]], $[[R0]], 65535
+; MIPS32: srl     $[[R2:[0-9]+]], $5, 16
+; MIPS32: srl     $[[R3:[0-9]+]], $4, 16
+; MIPS32: addu    $[[R4:[0-9]+]], $[[R3]], $[[R2]]
+; MIPS32: sll     $2, $[[R4]], 16
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $5, 0
+
+  %1 = add <2 x i16> %a, %b
+  ret <2 x i16> %1
+}
+
+define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) {
+; ALL-LABEL: i16_4:
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+  %1 = add <4 x i16> %a, %b
+  ret <4 x i16> %1
+}
+
+define <8 x i16> @i16_8(<8 x i16> %a, <8 x i16> %b) {
+; ALL-LABEL: i16_8:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+  %1 = add <8 x i16> %a, %b
+  ret <8 x i16> %1
+}
+
+define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) {
+; ALL-LABEL: i32_2:
+; MIPS32-DAG: addu    $2, $4, $6
+; MIPS32-DAG: addu    $3, $5, $7
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: sll     ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: sll     ${{[0-9]+}}, $5, 0
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+  %1 = add <2 x i32> %a, %b
+
+  ret <2 x i32> %1
+}
+
+define <4 x i32> @i32_4(<4 x i32> %a, <4 x i32> %b) {
+; ALL-LABEL: i32_4:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: addu $2
+; MIPS32-DAG: addu $3
+; MIPS32-DAG: addu $4
+; MIPS32-DAG: addu $5
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: sll     ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: sll     ${{[0-9]+}}, $5, 0
+; MIPS64-DAG: sll     ${{[0-9]+}}, $6, 0
+; MIPS64-DAG: sll     ${{[0-9]+}}, $7, 0
+; MIPS64-DAG: dsrl    ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: dsrl    ${{[0-9]+}}, $5, 32
+; MIPS64-DAG: dsrl    ${{[0-9]+}}, $6, 32
+; MIPS64-DAG: dsrl    ${{[0-9]+}}, $7, 32
+  %1 = add <4 x i32> %a, %b
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @i64_2(<2 x i64> %a, <2 x i64> %b) {
+; ALL-LABEL: i64_2:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: addu $2
+; MIPS32-DAG: addu $3
+; MIPS32-DAG: addu $4
+; MIPS32-DAG: addu $5
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: daddu $2, $4, $6
+; MIPS64-DAG: daddu $3, $5, $7
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+  %1 = add <2 x i64> %a, %b
+  ret <2 x i64> %1
+}
+
+; The MIPS vector ABI treats vectors of floats differently to vectors of
+; integers.
+
+; For arguments floating pointer vectors are bitcasted to integer vectors whose
+; elements are of GPR width and where the element count is deduced from
+; the length of the floating point vector divided by the size of the GPRs.
+
+; For returns, integer vectors are passed via the GPR register set, but
+; floating point vectors are returned via a hidden sret pointer.
+
+; For testing purposes we skip returning values here and test them below
+; instead.
+@float_res_v2f32 = external global <2 x float>
+
+define void @float_2(<2 x float> %a, <2 x float> %b) {
+; ALL-LABEL: float_2:
+; MIPS32: mtc1 $7, $f[[F0:[0-9]+]]
+; MIPS32: mtc1 $5, $f[[F1:[0-9]+]]
+; MIPS32: add.s $f[[F2:[0-9]+]], $f[[F1]], $f[[F0]]
+; MIPS32: swc1 $f[[F2]]
+; MIPS32: mtc1 $6, $f[[F3:[0-9]+]]
+; MIPS32: mtc1 $4, $f[[F4:[0-9]+]]
+; MIPS32: add.s $f[[F5:[0-9]+]], $f[[F4]], $f[[F3]]
+; MIPS32: swc1 $f[[F5]]
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
+; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R2:[0-9]+]], $4, 32
+; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R2]], 0
+; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R3]], 0
+; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+  %1 = fadd <2 x float> %a, %b
+  store <2 x float> %1, <2 x float> * @float_res_v2f32
+  ret void
+}
+
+@float_res_v4f32 = external global <4 x float>
+
+; For MSA this case is suboptimal, the 4 loads can be combined into a single
+; ld.w.
+
+define void @float_4(<4 x float> %a, <4 x float> %b) {
+; ALL-LABEL: float_4:
+; MIPS32-DAG: mtc1 $4
+; MIPS32-DAG: mtc1 $5
+; MIPS32-DAG: mtc1 $6
+; MIPS32-DAG: mtc1 $7
+; MIPS32-DAG: lwc1
+; MIPS32-DAG: lwc1
+; MIPS32-DAG: lwc1
+; MIPS32-DAG: lwc1
+
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $[[R1]]
+; MIPS32R5-DAG: lw $[[R2:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $[[R2]]
+; MIPS32R5-DAG: lw $[[R3:[0-9]+]], 24($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R3]]
+; MIPS32R5-DAG: lw $[[R4:[0-9]+]], 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R4]]
+
+; MIPS32R5-DAG: insert.w $w[[W1:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W1]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W1]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W1]][3], $7
+
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
+; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R2:[0-9]+]], $4, 32
+; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R2]], 0
+; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R3]], 0
+; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
+; MIPS64-DAG: sll $[[R6:[0-9]+]], $6, 0
+; MIPS64-DAG: sll $[[R7:[0-9]+]], $7, 0
+; MIPS64-DAG: mtc1 $[[R6]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R7]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R8:[0-9]+]], $6, 32
+; MIPS64-DAG: dsrl $[[R9:[0-9]+]], $7, 32
+; MIPS64-DAG: sll $[[R10:[0-9]+]], $[[R8]], 0
+; MIPS64-DAG: sll $[[R11:[0-9]+]], $[[R9]], 0
+; MIPS64-DAG: mtc1 $[[R10]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R11]], $f{{[0-9]+}}
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+  %1 = fadd <4 x float> %a, %b
+  store <4 x float> %1, <4 x float> * @float_res_v4f32
+  ret void
+}
+
+@double_v2f64 = external global <2 x double>
+
+define void @double_2(<2 x double> %a, <2 x double> %b) {
+; ALL-LABEL: double_2:
+; MIPS32-DAG: sw $7
+; MIPS32-DAG: sw $6
+; MIPS32-DAG: ldc1
+; MIPS32-DAG: ldc1
+; MIPS32:     add.d
+; MIPS32-DAG: sw $5
+; MIPS32-DAG: sw $4
+; MIPS32-DAG: ldc1
+; MIPS32-DAG: ldc1
+; MIPS32:     add.d
+
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $[[R1]]
+; MIPS32R5-DAG: lw $[[R2:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $[[R2]]
+; MIPS32R5-DAG: lw $[[R3:[0-9]+]], 24($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R3]]
+; MIPS32R5-DAG: lw $[[R4:[0-9]+]], 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R4]]
+
+; MIPS32R5-DAG: insert.w $w[[W1:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W1]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W1]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W1]][3], $7
+
+; MIPS64-DAG: dmtc1 $6, $f[[R0:[0-9]+]]
+; MIPS64-DAG: dmtc1 $4, $f[[R1:[0-9]+]]
+; MIPS64-DAG: add.d $f[[R2:[0-9]+]], $f[[R1]], $f[[R0]]
+; MIPS64-DAG: dmtc1 $7, $f[[R3:[0-9]+]]
+; MIPS64-DAG: dmtc1 $5, $f[[R4:[0-9]+]]
+; MIPS64-DAG: add.d $f[[R5:[0-9]+]], $f[[R4]], $f[[R3]]
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+  %1 = fadd <2 x double> %a, %b
+  store <2 x double> %1, <2 x double> * @double_v2f64
+  ret void
+}
+
+; Return value testing.
+; Integer vectors are returned in $2, $3, $4, $5 for O32, $2, $3 for N32/N64
+; Floating point vectors are returned through a hidden sret pointer.
+
+@gv2i8 = global <2 x i8> <i8 1, i8 2>
+@gv4i8 = global <4 x i8> <i8 0, i8 1, i8 2, i8 3>
+@gv8i8 = global <8 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
+@gv16i8 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
+
+@gv2i16 = global <2 x i16> <i16 1, i16 2>
+@gv4i16 = global <4 x i16> <i16 0, i16 1, i16 2, i16 3>
+@gv8i16 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+
+@gv2i32 = global <2 x i32> <i32 0, i32 1>
+@gv4i32 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+@gv2i64 = global <2 x i64> <i64 0, i64 1>
+
+define <2 x i8> @ret_2_i8() {
+; ALL-LABEL: ret_2_i8:
+; MIPS32-DAG:   lhu $2
+; MIPS32R5-DAG: lhu $2
+
+; FIXME: why is this lh instead of lhu on mips64?
+
+; MIPS64-DAG:  lh $2
+; MIPS64-DAG:  lh $2
+  %1 = load <2 x i8>, <2 x i8> * @gv2i8
+  ret <2 x i8> %1
+}
+
+define <4 x i8> @ret_4_i8() {
+; ALL-LABEL: ret_4_i8:
+; MIPS32-DAG:   lw $2
+; MIPS32R5-DAG: lw $2
+
+; MIPS64-DAG:   lw $2
+; MIPS64R5-DAG: lw $2
+
+  %1 = load <4 x i8>, <4 x i8> * @gv4i8
+  ret <4 x i8> %1
+}
+
+define <8 x i8> @ret_8_i8() {
+; ALL-LABEL: ret_8_i8:
+; MIPS32-DAG:   lw $2
+; MIPS32-DAG:   lw $3
+
+; MIPS32R5: copy_s.w $2, $w[[W0:[0-9]+]]
+; MIPS32R5: copy_s.w $3, $w[[W0]]
+
+; MIPS64-DAG:   ld $2
+; MIPS64R5-DAG: ld $2
+  %1 = load <8 x i8>, <8 x i8> * @gv8i8
+  ret <8 x i8> %1
+}
+
+define <16 x i8> @ret_16_i8() {
+; ALL-LABEL: ret_16_i8:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2
+; MIPS64R5-DAG: copy_s.d $3
+
+  %1 = load <16 x i8>, <16 x i8> * @gv16i8
+  ret <16 x i8> %1
+}
+
+define <2 x i16> @ret_2_i16() {
+; ALL-LABEL: ret_2_i16:
+; MIPS32-DAG:   lw $2
+
+; MIPS32R5-DAG: lw $2
+
+; MIPS64-DAG:   lw $2
+
+; MIPS64R5-DAG: lw $2
+  %1 = load <2 x i16>, <2 x i16> * @gv2i16
+  ret <2 x i16> %1
+}
+
+define <4 x i16> @ret_4_i16() {
+; ALL-LABEL: ret_4_i16:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]]
+
+; MIPS64-DAG:   ld $2
+; MIPS64R5-DAG: ld $2
+  %1 = load <4 x i16>, <4 x i16> * @gv4i16
+  ret <4 x i16> %1
+}
+
+define <8 x i16> @ret_8_i16() {
+; ALL-LABEL: ret_8_i16:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2
+; MIPS64R5-DAG: copy_s.d $3
+
+  %1 = load <8 x i16>, <8 x i16> * @gv8i16
+  ret <8 x i16> %1
+}
+
+define <2 x i32> @ret_2_i32() {
+; ALL-LABEL: ret_2_i32:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]]
+
+; MIPS64-DAG:   ld $2
+; MIPS64R5-DAG: ld $2
+
+  %1 = load <2 x i32>, <2 x i32> * @gv2i32
+  ret <2 x i32> %1
+}
+
+define <4 x i32> @ret_4_i32() {
+; ALL-LABEL: ret_4_i32:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2, $w[[W0:[0-9]+]]
+; MIPS64R5-DAG: copy_s.d $3, $w[[W0]]
+
+  %1 = load <4 x i32>, <4 x i32> * @gv4i32
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @ret_2_i64() {
+; ALL-LABEL: ret_2_i64:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2, $w[[W0:[0-9]+]]
+; MIPS64R5-DAG: copy_s.d $3, $w[[W0]]
+
+  %1 = load <2 x i64>, <2 x i64> * @gv2i64
+  ret <2 x i64> %1
+}
+
+@gv2f32 = global <2 x float> <float 0.0, float 0.0>
+@gv4f32 = global <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>
+
+define <2 x float> @ret_float_2() {
+entry:
+; ALL-LABEL: ret_float_2:
+
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
+
+; MIPS32R5-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32R5-DAG: swc1 $f{{[0-9]+}}, 4($4)
+
+; MIPS64: ld $2
+
+; MIPS64R5: ld $2
+
+  %0 = load <2 x float>, <2 x float> * @gv2f32
+  ret <2 x float> %0
+}
+
+define <4 x float> @ret_float_4() {
+entry:
+; ALL-LABEL: ret_float_4:
+
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 12($4)
+
+; MIPS32R5: st.w $w{{[0-9]+}}, 0($4)
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2, $w{{[0-9]+}}[0]
+; MIPS64R5-DAG: copy_s.d $3, $w{{[0-9]+}}[1]
+
+  %0 = load <4 x float>, <4 x float> * @gv4f32
+  ret <4 x float> %0
+}
+
+@gv2f64 = global <2 x double> <double 0.0, double 0.0>
+
+define <2 x double> @ret_double_2() {
+entry:
+; ALL-LABEL: ret_double_2:
+
+; MIPS32-DAG: sdc1 $f{{[0-9]+}}, 8($4)
+; MIPS32-DAG: sdc1 $f{{[0-9]+}}, 0($4)
+
+; MIPS32R5: st.d $w{{[0-9]+}}, 0($4)
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $2
+
+; MIPS64R5-DAG: copy_s.d $2, $w{{[0-9]+}}[0]
+; MIPS64R5-DAG: copy_s.d $3, $w{{[0-9]+}}[1]
+
+  %0 = load <2 x double>, <2 x double> * @gv2f64
+  ret <2 x double> %0
+}
+
+; Test argument lowering and call result lowering.
+
+define void @call_i8_2() {
+entry:
+; ALL-LABEL: call_i8_2:
+; MIPS32EB-DAG: addiu $4
+; MIPS32EB-DAG: addiu $5
+; MIPS32-NOT: addiu $6
+; MIPS32-NOT: addiu $7
+
+; MIPS32R5-DAG: lhu $4, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: lhu $5, {{[0-9]+}}($sp)
+
+; MIPS32R5: jal
+; MIPS32R5: sw $2, {{[0-9]+}}($sp)
+
+; MIPS32R5-DAG: sb ${{[0-9]+}}, 1(${{[0-9]+}})
+; MIPS32R5-DAG; sb ${{[0-9]+}}, %lo(gv2i8)(${{[0-9]+}})
+
+; MIPS64EB: daddiu $4, $zero, 1543
+; MIPS64EB: daddiu $5, $zero, 3080
+
+; MIPS64EL: daddiu $4, $zero, 1798
+; MIPS64EL; daddiu $5, $zero, 2060
+
+; MIPS64R5-DAG: lh $4
+; MIPS64R5-DAG: lh $5
+
+; MIPS32: jal i8_2
+; MIPS64: jalr $25
+
+; MIPS32EB-DAG: srl $[[R0:[0-9]+]], $2, 16
+; MIPS32EB-DAG: sb $[[R0]]
+; MIPS32EB-DAG: srl $[[R1:[0-9]+]], $2, 24
+; MIPS32EB-DAG: sb $[[R1]]
+
+; MIPS32EL: sb $2
+; MIPS32EL: srl $[[R0:[0-9]+]], $2, 8
+; MIPS32EL: sb $[[R0]]
+
+; MIPS64EB: dsrl $[[R4:[0-9]+]], $2, 48
+; MIPS64EB: sb $[[R4]]
+; MIPS64EB: dsrl $[[R5:[0-9]+]], $2, 56
+; MIPS64EB: sb $[[R5]]
+
+; MIPS64EL: sll $[[R6:[0-9]+]], $2, 0
+; MIPS64EL: sb $[[R6]]
+; MIPS64EL: srl $[[R7:[0-9]+]], $[[R6]], 8
+; MIPS64EL: sb $[[R7]]
+
+; MIPS64R5: sd $2
+
+  %0 = call <2 x i8> @i8_2(<2 x i8> <i8 6, i8 7>, <2 x i8> <i8 12, i8 8>)
+  store <2 x i8> %0, <2 x i8> * @gv2i8
+  ret void
+}
+
+define void @call_i8_4() {
+entry:
+; ALL-LABEL: call_i8_4:
+; MIPS32: ori $4
+; MIPS32: ori $5
+; MIPS32-NOT: ori $6
+; MIPS32-NOT: ori $7
+
+; MIPS32R5-DAG: lw $4, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: lw $5, {{[0-9]+}}($sp)
+
+; MIPS64: ori $4
+; MIPS64: ori $5
+
+; MIPS64R5: lw $4
+; MIPS64R5: lw $5
+
+; MIPS32: jal i8_4
+; MIPS64: jalr $25
+
+; MIPS32: sw $2
+
+; MIPS32R5-DAG: sw $2
+
+; MIPS64: sw $2
+; MIPS64R5: sw $2
+
+  %0 = call <4 x i8> @i8_4(<4 x i8> <i8 6, i8 7, i8 9, i8 10>, <4 x i8> <i8 12, i8 8, i8 9, i8 10>)
+  store <4 x i8> %0, <4 x i8> * @gv4i8
+  ret void
+}
+
+define void @call_i8_8() {
+entry:
+; ALL-LABEL: call_i8_8:
+
+; MIPS32: ori $6
+; MIPS32: ori $4
+; MIPS32: move  $5
+; MIPS32: move  $7
+
+; MIPS32R5-DAG: ori $6
+; MIPS32R5-DAG: ori $4
+; MIPS32R5-DAG: move  $5
+; MIPS32R5-DAG: move  $7
+
+; MIPS64EB: daddiu $4, ${{[0-9]+}}, 2314
+; MIPS64EB: daddiu $5, ${{[0-9]+}}, 2314
+
+; MIPS64EL: daddiu $4, ${{[0-9]+}}, 1798
+; MIPS64EL: daddiu $5, ${{[0-9]+}}, 2060
+
+; MIPS32: jal i8_8
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $2
+; MIPS32-DAG: sw $3
+
+; MIPS32R5-DAG: sw $2
+; MIPS32R5-DAG: sw $3
+
+; MIPS64: sd $2
+; MIPS64R5: sd $2
+
+  %0 = call <8 x i8> @i8_8(<8 x i8> <i8 6, i8 7, i8 9, i8 10, i8 6, i8 7, i8 9, i8 10>, <8 x i8> <i8 12, i8 8, i8 9, i8 10, i8 6, i8 7, i8 9, i8 10>)
+  store <8 x i8> %0, <8 x i8> * @gv8i8
+  ret void
+}
+
+define void @calli8_16() {
+entry:
+; ALL-LABEL: calli8_16:
+; MIPS32-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS32: ori $4, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32: ori $7, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32: move  $5, ${{[0-9]+}}
+; MIPS32: move  $6, ${{[0-9]+}}
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $5
+; MIPS64-DAG: daddiu $6
+; MIPS64-DAG: daddiu $7
+
+; MIPS64R5-DAG: copy_s.d $4
+; MIPS64R5-DAG: copy_s.d $5
+; MIPS64R5-DAG: copy_s.d $6
+; MIPS64R5-DAG: copy_s.d $7
+
+; MIPS32: jal i8_16
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv16i8)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64-DAG: sd $3
+; MIPS64-DAG: sd $2
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+  %0 = call <16 x i8> @i8_16(<16 x i8> <i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7, i8 6, i8 7, i8 9, i8 10>, <16 x i8> <i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 12, i8 8, i8 9, i8 10>)
+  store <16 x i8> %0, <16 x i8> * @gv16i8
+  ret void
+}
+
+define void @calli16_2() {
+entry:
+; ALL-LABEL: calli16_2:
+
+; MIPS32-DAG: ori $4
+; MIPS32-DAG: ori $5
+
+; MIPS32R5-DAG: lw $4
+; MIPS32R5-DAG: lw $5
+
+; MIPS64: ori $4
+; MIPS64: ori $5
+
+; MIPS64R5-DAG: lw $4
+; MIPS64R5-DAG: lw $5
+
+; MIPS32: jal i16_2
+; MIPS64: jalr $25
+
+; MIPS32: sw $2, %lo(gv2i16)
+
+; MIPS32R5: sw $2, %lo(gv2i16)
+
+; MIPS64: sw $2
+
+; MIPS64R6: sw $2
+
+  %0 = call <2 x i16> @i16_2(<2 x i16> <i16 6, i16 7>, <2 x i16> <i16 12, i16 8>)
+  store <2 x i16> %0, <2 x i16> * @gv2i16
+  ret void
+}
+
+define void @calli16_4() {
+entry:
+; ALL-LABEL: calli16_4:
+; MIPS32-DAG: ori $4
+; MIPS32-DAG: ori $5
+; MIPS32-DAG: ori $6
+; MIPS32-DAG: move $7
+
+; MIPS32R5-DAG: ori $4
+; MIPS32R5-DAG: ori $5
+; MIPS32R5-DAG: ori $6
+; MIPS32R5-DAG: move $7
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $5
+
+; MIPS64R5-DAG: ld $4
+; MIPS64R5-DAG: ld $5
+
+; MIPS32: jal i16_4
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv4i16)(${{[0-9]+}})
+
+; MIPS32R5-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32R5-DAG: sw $2, %lo(gv4i16)(${{[0-9]+}})
+
+; MIPS64: sd $2
+; MIPS64R5: sd $2
+
+  %0 = call <4 x i16> @i16_4(<4 x i16> <i16 6, i16 7, i16 9, i16 10>, <4 x i16> <i16 12, i16 8, i16 9, i16 10>)
+  store <4 x i16> %0, <4 x i16> * @gv4i16
+  ret void
+}
+
+define void @calli16_8() {
+entry:
+; ALL-LABEL: calli16_8:
+
+; MIPS32-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS32-DAG: ori $4, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32-DAG: ori $5, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32-DAG: move  $6, ${{[0-9]+}}
+; MIPS32-DAG: move  $7, ${{[0-9]+}}
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $7
+; MIPS64-DAG: move $5
+; MIPS64-DAG: move $6
+
+; MIPS64R5-DAG: copy_s.d $4, $w[[W0:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $5, $w[[W0]][1]
+; MIPS64R5-DAG: copy_s.d $6, $w[[W1:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $7, $w[[W1]][1]
+
+; MIPS32: jal i16_8
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv8i16)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64: sd $3
+; MIPS64: sd $2
+
+; MIPS64R5-DAG: insert.d $w[[W2:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W2]][1], $3
+
+  %0 = call <8 x i16> @i16_8(<8 x i16> <i16 6, i16 7, i16 9, i16 10, i16 6, i16 7, i16 9, i16 10>, <8 x i16> <i16 6, i16 7, i16 9, i16 10, i16 12, i16 8, i16 9, i16 10>)
+  store <8 x i16> %0, <8 x i16> * @gv8i16
+  ret void
+}
+
+define void @calli32_2() {
+entry:
+; ALL-LABEL: calli32_2:
+
+; MIPS32-DAG: addiu $4
+; MIPS32-DAG: addiu $5
+; MIPS32-DAG: addiu $6
+; MIPS32-DAG: addiu $7
+
+; MIPS32R5-DAG: addiu $4
+; MIPS32R5-DAG: addiu $5
+; MIPS32R5-DAG: addiu $6
+; MIPS32R5-DAG: addiu $7
+
+; MIPS64: daddiu $4
+; MIPS64: daddiu $5
+
+; MIPS64R5-DAG: ld $4
+; MIPS64R5-DAG: ld $5
+
+; MIPS32: jal i32_2
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $2, %lo(gv2i32)(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+
+; MIPS32R5-DAG: sw $2, %lo(gv2i32)(${{[0-9]+}})
+; MIPS32R5-DAG: sw $3, 4(${{[0-9]+}})
+
+; MIPS64: sd $2
+
+; MIPS64R5: sd $2
+
+  %0 = call <2 x i32> @i32_2(<2 x i32> <i32 6, i32 7>, <2 x i32> <i32 12, i32 8>)
+  store <2 x i32> %0, <2 x i32> * @gv2i32
+  ret void
+}
+
+define void @calli32_4() {
+entry:
+; ALL-LABEL: calli32_4:
+
+; MIPS32-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS32-DAG: addiu $4
+; MIPS32-DAG: addiu $5
+; MIPS32-DAG: addiu $6
+; MIPS32-DAG: addiu $7
+
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS32R5-DAG: addiu $4
+; MIPS32R5-DAG: addiu $5
+; MIPS32R5-DAG: addiu $6
+; MIPS32R5-DAG: addiu $7
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $6
+; MIPS64-DAG: daddiu $5
+; MIPS64-DAG: move $7
+
+; MIPS64R5-DAG: copy_s.d $4, $w[[W0:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $5, $w[[W0]][1]
+; MIPS64R5-DAG: copy_s.d $6, $w[[W1:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $7, $w[[W1]][1]
+
+; MIPS32: jal i32_4
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv4i32)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64-DAG: sd $2
+; MIPS64-DAG: sd $3
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R6-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+  %0 = call <4 x i32> @i32_4(<4 x i32> <i32 6, i32 7, i32 9, i32 10>, <4 x i32> <i32 12, i32 8, i32 9, i32 10>)
+  store <4 x i32> %0, <4 x i32> * @gv4i32
+  ret void
+}
+
+define void @calli64_2() {
+entry:
+; ALL-LABEL: calli64_2:
+
+; MIPS32-DAG: sw  ${{[0-9a-z]+}}, 28($sp)
+; MIPS32-DAG: sw  ${{[0-9a-z]+}}, 24($sp)
+; MIPS32-DAG: sw  ${{[0-9a-z]+}}, 20($sp)
+; MIPS32-DAG: sw  ${{[0-9a-z]+}}, 16($sp)
+
+; MIPS32-DAG: addiu $4
+; MIPS32-DAG: addiu $5
+; MIPS32-DAG: addiu $6
+; MIPS32-DAG: addiu $7
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS64: daddiu $4
+; MIPS64: daddiu $5
+; MIPS64: daddiu $6
+; MIPS64: daddiu $7
+
+; MIPS64R5: daddiu $4
+; MIPS64R5: daddiu $5
+; MIPS64R5: daddiu $6
+; MIPS64R5: daddiu $7
+
+; MIPS32: jal i64_2
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv2i64)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64-DAG: sd $3
+; MIPS64-DAG: sd $2
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R6-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+  %0 = call <2 x i64> @i64_2(<2 x i64> <i64 6, i64 7>, <2 x i64> <i64 12, i64 8>)
+  store <2 x i64> %0, <2 x i64> * @gv2i64
+  ret void
+}
+
+declare <2 x float> @float2_extern(<2 x float>, <2 x float>)
+declare <4 x float> @float4_extern(<4 x float>, <4 x float>)
+declare <2 x double> @double2_extern(<2 x double>, <2 x double>)
+
+define void @callfloat_2() {
+entry:
+; ALL-LABEL: callfloat_2:
+
+; MIPS32-DAG: addiu $4, $sp, 24
+; MIPS32-DAG: addiu $6, $zero, 0
+; MIPS32-DAG: lui $7
+
+; MIPS32R5-DAG: addiu $4, $sp, 24
+; MIPS32R5-DAG: addiu $6, $zero, 0
+; MIPS32R5-DAG: lui $7
+
+; MIPS64: dsll $4
+; MIPS64: dsll $5
+
+; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
+
+; MIPS32: jal float2_extern
+; MIPS64: jalr $25
+
+; MIPS32-DAG: lwc1 $f[[F0:[0-9]+]], 24($sp)
+; MIPS32-DAG: lwc1 $f[[F1:[0-9]+]], 28($sp)
+
+; MIPS32-DAG: swc1 $f[[F1]], 4(${{[0-9]+}})
+; MIPS32-DAG: swc1 $f[[F0]], %lo(gv2f32)(${{[0-9]+}})
+
+; MIPS32R5-DAG: lwc1 $f[[F0:[0-9]+]], 24($sp)
+; MIPS32R5-DAG: lwc1 $f[[F1:[0-9]+]], 28($sp)
+
+; MIPS32R5-DAG: swc1 $f[[F1]], 4(${{[0-9]+}})
+; MIPS32R5-DAG: swc1 $f[[F0]], %lo(gv2f32)(${{[0-9]+}})
+
+; MIPS64: sd $2
+
+; MIPS64R5: sd $2
+
+  %0 = call <2 x float> @float2_extern(<2 x float> <float 0.0, float -1.0>, <2 x float> <float 12.0, float 14.0>)
+  store <2 x float> %0, <2 x float> * @gv2f32
+  ret void
+}
+
+define void @callfloat_4() {
+entry:
+; ALL-LABEL: callfloat_4:
+
+; MIPS32: sw ${{[0-9]+}}, 36($sp)
+; MIPS32: sw ${{[0-9]+}}, 32($sp)
+; MIPS32: sw ${{[0-9]+}}, 28($sp)
+; MIPS32: sw ${{[0-9]+}}, 24($sp)
+; MIPS32: sw ${{[0-9]+}}, 20($sp)
+; MIPS32: sw ${{[0-9]+}}, 16($sp)
+; MIPS32: addiu $4, $sp, 48
+; MIPS32: addiu $6, $zero, 0
+; MIPS32: lui $7
+
+; MIPS32R5: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5: copy_s.w $7, $w{{[0-9]+}}
+; MIPS32R5: sw ${{[0-9]+}}, 36($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 32($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 28($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 24($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 20($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 16($sp)
+; MIPS32R5: addiu $4, $sp, 48
+
+; MIPS64-DAG: dsll $4
+; MIPS64-DAG: dsll $5
+; MIPS64-DAG: dsll $6
+; MIPS64-DAG: dsll $7
+
+; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $6, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $7, $w{{[0-9]+}}
+
+; MIPS64: jalr $25
+; MIPS32: jal
+
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 48($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 52($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 56($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 60($sp)
+
+; MIPS32R5: ld.w $w{{[0-9]+}}, 48($sp)
+
+; MIPS64-DAG: $2
+; MIPS64-DAG: $3
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+  %0 = call <4 x float> @float4_extern(<4 x float> <float 0.0, float -1.0, float 2.0, float 4.0>, <4 x float> <float 12.0, float 14.0, float 15.0, float 16.0>)
+  store <4 x float> %0, <4 x float> * @gv4f32
+  ret void
+}
+
+define void @calldouble_2() {
+entry:
+; ALL-LABEL: calldouble_2:
+
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 36($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 32($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 28($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 24($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 20($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 16($sp)
+
+; MIPS32-DAG: addiu $4, $sp, [[R0:[0-9]+]]
+; MIPS32-DAG: addiu $6, $zero, 0
+; MIPS32-DAG: addiu $7, $zero, 0
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 36($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 32($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS64-DAG: dsll $5
+; MIPS64-DAG: dsll $6
+; MIPS64-DAG: dsll $7
+; MIPS64-DAG: daddiu $4
+
+; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $6, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $7, $w{{[0-9]+}}
+
+; MIPS32: jal double2_extern
+; MIPS64: jalr $25
+
+; MIPS32-DAG: ldc1 $f[[F0:[0-9]+]], 48($sp)
+; MIPS32-DAG: ldc1 $f[[F1:[0-9]+]], 56($sp)
+
+; MIPS32-DAG: sdc1 $f[[F1]], 8(${{[0-9]+}})
+; MIPS32-DAG: sdc1 $f[[F0]], %lo(gv2f64)(${{[0-9]+}})
+
+; MIPS32R5: ld.d $w[[W0:[0-9]+]], 48($sp)
+; MIPS32R5: st.d $w[[W0]], 0(${{[0-9]+}})
+
+; MIPS64-DAG: sd $2
+; MIPS64-DAG: sd $3
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+  %0 = call <2 x double> @double2_extern(<2 x double> <double 0.0, double -1.0>, <2 x double> <double 12.0, double 14.0>)
+  store <2 x double> %0, <2 x double> * @gv2f64
+  ret void
+}
+
+; The mixed tests show that due to alignment requirements, $5 is not used
+; in argument passing.
+
+define float @mixed_i8(<2 x float> %a, i8 %b, <2 x float> %c) {
+entry:
+; ALL-LABEL: mixed_i8:
+
+; MIPS32-DAG: mtc1 $5, $f{{[0-9]+}}
+; MIPS32: andi $[[R7:[0-9]+]], $6, 255
+; MIPS32: mtc1 $[[R7]], $f[[F0:[0-9]+]]
+; MIPS32: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
+
+; MIPS32-DAG: mtc1 $4, $f{{[0-9]+}}
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 20($sp)
+; MIPS32-DAG: add.s $f0, $f{{[0-9]+}}, $f{{[0-9]+}}
+
+; MIPS32R5: andi $[[R0:[0-9]+]], $6, 255
+; MIPS32R5: sw $[[R0]], {{[0-9]+}}($sp)
+; MIPS32R5: sw $[[R0]], {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $5, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $4, {{[0-9]+}}($sp)
+
+; MIPS64EB-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64EB-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64EB: sll $[[R6:[0-9]+]], $5, 0
+; MIPS64EB: andi $[[R7:[0-9]+]], $[[R6]], 255
+; MIPS64EB: mtc1 $[[R7]], $f[[F0:[0-9]+]]
+; MIPS64EB: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
+
+; MIPS64EB-DAG: dsrl $[[R1:[0-9]+]], $4, 32
+; MIPS64EB-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
+; MIPS64EB-DAG: mtc1 $[[R2:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EB-DAG: sll $[[R3:[0-9]+]], $6, 0
+; MIPS64EB-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
+; MIPS64EB-DAG: dsrl $[[R4:[0-9]+]], $6, 32
+; MIPS64EB-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
+; MIPS64EB-DAG: mtc1 $[[R5:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EL-DAG: dsrl $[[R1:[0-9]+]], $4, 32
+; MIPS64EL-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
+; MIPS64EL-DAG: mtc1 $[[R2:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EL: sll $[[R6:[0-9]+]], $5, 0
+; MIPS64EL: andi $[[R7:[0-9]+]], $[[R6]], 255
+; MIPS64EL: mtc1 $[[R7]], $f[[F0:[0-9]+]]
+; MIPS64EL: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
+
+; MIPS64EL-DAG: dsrl $[[R4:[0-9]+]], $6, 32
+; MIPS64EL-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
+; MIPS64EL-DAG: mtc1 $[[R5:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EL-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64EL-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64EL-DAG: sll $[[R3:[0-9]+]], $6, 0
+; MIPS64EL-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
+
+; MIPS64R5: sll $[[R0:[0-9]+]], $5, 0
+; MIPS64R5: andi $[[R1:[0-9]+]], $[[R0]], 255
+; MIPS64R5: sd $4, {{[0-9]+}}($sp)
+; MIPS64R5: sd $6, {{[0-9]+}}($sp)
+
+  %0 = zext i8 %b to i32
+  %1 = uitofp i32 %0 to float
+  %2 = insertelement <2 x float> undef, float %1, i32 0
+  %3 = insertelement <2 x float> %2, float %1, i32 1
+  %4 = fadd <2 x float> %3, %a
+  %5 = fadd <2 x float> %4, %c
+  %6 = extractelement <2 x float> %5, i32 0
+  %7 = extractelement <2 x float> %5, i32 1
+  %8 = fadd float %6, %7
+  ret float %8
+}
+
+define <4 x float> @mixed_32(<4 x float> %a, i32 %b) {
+entry:
+; ALL-LABEL: mixed_32:
+
+; MIPS32-DAG: mtc1 $6, $f{{[0-9]+}}
+; MIPS32-DAG: mtc1 $7, $f{{[0-9]+}}
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 28($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 24($sp)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 12($4)
+
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][0], $6
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][1], $7
+; MIPS32R5: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][2], $[[R0]]
+; MIPS32R5: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][3], $[[R1]]
+; MIPS32R5: lw $[[R0:[0-9]+]], 24($sp)
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $6, 0
+; MIPS64-DAG: dsrl $[[R0:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $[[R0]], 0
+; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
+; MIPS64-DAG: sll $[[R2:[0-9]+]], $4, 0
+; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R3]], 0
+; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R2]], $f{{[0-9]+}}
+; MIPS64-DAG: sll	$[[R6:[0-9]+]], $5, 0
+; MIPS64-DAG: mtc1 $[[R6:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64R5: insert.d $w[[W0:[0-9]+]][0], $4
+; MIPS64R5: insert.d $w[[W0]][1], $5
+; MIPS64R5: sll $[[R0:[0-9]+]], $6, 0
+; MIPS64R5: fill.w $w{{[0-9]+}}, $[[R0]]
+
+  %0 = uitofp i32 %b to float
+  %1 = insertelement <4 x float> undef, float %0, i32 0
+  %2 = insertelement <4 x float> %1, float %0, i32 1
+  %3 = insertelement <4 x float> %2, float %0, i32 2
+  %4 = insertelement <4 x float> %3, float %0, i32 3
+  %5 = fadd <4 x float> %4, %a
+  ret <4 x float> %5
+}
+
+
+; This test is slightly more fragile than I'd like as the offset into the
+; outgoing arguments area is dependant on the size of the stack frame for
+; this function.
+
+define <4 x float> @cast(<4 x i32> %a) {
+entry:
+; ALL-LABEL: cast:
+
+; MIPS32: addiu $sp, $sp, -32
+; MIPS32-DAG: sw $6, {{[0-9]+}}($sp)
+; MIPS32-DAG: sw $7, {{[0-9]+}}($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 48($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 52($sp)
+
+; MIPS32R5-DAG: insert.w  $w0[0], $6
+; MIPS32R5-DAG: insert.w  $w0[1], $7
+; MIPS32R5-DAG: lw  $[[R0:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: insert.w  $w0[2], $[[R0]]
+; MIPS32R5-DAG: lw  $[[R1:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w  $w0[3], $[[R1]]
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+
+; MIPS64R5-DAG: insert.d  $w0[0], $4
+; MIPS64R5-DAG: insert.d  $w0[1], $5
+
+  %0 = uitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %0
+}
+
+define <4 x float> @select(<4 x i32> %cond, <4 x float> %arg1, <4 x float> %arg2) {
+entry:
+; ALL-LABEL: select:
+
+; MIPS32-DAG: andi ${{[0-9]+}}, $7, 1
+; MIPS32-DAG: andi ${{[0-9]+}}, $6, 1
+; MIPS32-DAG: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32-DAG: andi ${{[0-9]+}}, $[[R0]], 1
+; MIPS32-DAG: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32-DAG: andi ${{[0-9]+}}, $[[R0]], 1
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $7
+; MIPS32R5-DAG: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R0]]
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R1]]
+; MIPS32R5-DAG: slli.w $w{{[0-9]}}, $w[[W0]]
+
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $6, 0
+; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R1:[0-9]+]], $6, 32
+; MIPS64-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
+; MIPS64-DAG: mtc1 $[[R2]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R3:[0-9]+]], $7, 0
+; MIPS64-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R4:[0-9]+]], $7, 32
+; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
+; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R6:[0-9]+]], $8, 0
+; MIPS64-DAG: mtc1 $[[R6]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R7:[0-9]+]], $8, 32
+; MIPS64-DAG: sll $[[R8:[0-9]+]], $[[R7]], 0
+; MIPS64-DAG: mtc1 $[[R8]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R9:[0-9]+]], $9, 0
+; MIPS64-DAG: mtc1 $[[R9]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R10:[0-9]+]], $9, 32
+; MIPS64-DAG: sll $[[R11:[0-9]+]], $[[R10]], 0
+; MIPS64-DAG: mtc1 $[[R11]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R12:[0-9]+]], $4, 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R12]], 1
+; MIPS64-DAG: dsrl $[[R13:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[R14:[0-9]+]], $[[R13]], 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R14]], 1
+
+; MIPS64-DAG: sll $[[R15:[0-9]+]], $5, 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R15]], 1
+; MIPS64-DAG: dsrl $[[R16:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R17:[0-9]+]], $[[R16]], 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R17]], 1
+
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $8
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $9
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $6
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $7
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $4
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $5
+
+  %cond.t = trunc <4 x i32> %cond to <4 x i1>
+  %res = select <4 x i1> %cond.t, <4 x float> %arg1, <4 x float> %arg2
+  ret <4 x float> %res
+}
Index: llvm/trunk/test/CodeGen/Mips/ctlz-v.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/ctlz-v.ll
+++ llvm/trunk/test/CodeGen/Mips/ctlz-v.ll
@@ -8,10 +8,14 @@
 ; MIPS32: clz     $2, $4
 ; MIPS32: clz     $3, $5
 
-; MIPS64-DAG: sll $[[A0:[0-9]+]], $4, 0
-; MIPS64-DAG: clz $2, $[[A0]]
-; MIPS64-DAG: sll $[[A1:[0-9]+]], $5, 0
-; MIPS64-DAG: clz $3, $[[A1]]
+; MIPS64-DAG: dsrl $[[A0:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[A1:[0-9]+]], $[[A0]], 0
+; MIPS64-DAG: clz $[[R0:[0-9]+]], $[[A1]]
+; MIPS64-DAG: dsll $[[R1:[0-9]+]], $[[R0]], 32
+; MIPS64-DAG: sll $[[A2:[0-9]+]], $4, 0
+; MIPS64-DAG: clz $[[R2:[0-9]+]], $[[A2]]
+; MIPS64-DAG: dext $[[R3:[0-9]+]], $[[R2]], 0, 32
+; MIPS64-DAG: or $2, $[[R3]], $[[R1]]
 
   %ret = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 true)
   ret <2 x i32> %ret
Index: llvm/trunk/test/CodeGen/Mips/cttz-v.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/cttz-v.ll
+++ llvm/trunk/test/CodeGen/Mips/cttz-v.ll
@@ -24,14 +24,17 @@
 ; MIPS64-DAG: and     $[[R2:[0-9]+]], $[[R1]], $[[R0]]
 ; MIPS64-DAG: clz     $[[R3:[0-9]+]], $[[R2]]
 ; MIPS64-DAG: addiu   $[[R4:[0-9]+]], $zero, 32
-; MIPS64-DAG: subu    $2, $[[R4]], $[[R3]]
-; MIPS64-DAG: sll     $[[A1:[0-9]+]], $5, 0
-; MIPS64-DAG: addiu   $[[R5:[0-9]+]], $[[A1]], -1
-; MIPS64-DAG: not     $[[R6:[0-9]+]], $[[A1]]
-; MIPS64-DAG: and     $[[R7:[0-9]+]], $[[R6]], $[[R5]]
-; MIPS64-DAG: clz     $[[R8:[0-9]+]], $[[R7]]
-; MIPS64-DAG: jr      $ra
-; MIPS64-DAG: subu    $3, $[[R4]], $[[R8]]
+; MIPS64-DAG: subu    $[[R5:[0-9]+]], $[[R4]], $[[R3]]
+; MIPS64-DAG: dsrl    $[[R6:[0-9]+]], $4, 32
+; MIPS64-DAG: sll     $[[R7:[0-9]+]], $[[R6]], 0
+; MIPS64-DAG: dext    $[[R8:[0-9]+]], $[[R5]], 0, 32
+; MIPS64-DAG: addiu   $[[R9:[0-9]+]], $[[R7]], -1
+; MIPS64-DAG: not     $[[R10:[0-9]+]], $[[R7]]
+; MIPS64-DAG: and     $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; MIPS64-DAG: clz     $[[R12:[0-9]+]], $[[R11]]
+; MIPS64-DAG: subu    $[[R13:[0-9]+]], $[[R4]], $[[R12]]
+; MIPS64-DAG: dsll    $[[R14:[0-9]+]], $[[R13]], 32
+; MIPS64-DAG: or      $2, $[[R8]], $[[R14]]
 
   %ret = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 true)
   ret <2 x i32> %ret
Index: llvm/trunk/test/CodeGen/Mips/return-vector.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/return-vector.ll
+++ llvm/trunk/test/CodeGen/Mips/return-vector.ll
@@ -128,8 +128,11 @@
 
 ; CHECK-LABEL:        call_f2:
 ; CHECK:        call16(f2)
-; CHECK-NOT:    lwc1
-; CHECK:        add.s    $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]]
+; CHECK:        addiu $4, $sp, [[O0:[0-9]+]]
+; CHECK-DAG:    lwc1 $f[[F0:[0-9]]], [[O0]]($sp)
+; CHECK-DAG:    lwc1 $f[[F1:[0-9]]], 20($sp)
+; CHECK:        add.s    $f0, $f[[F0]], $f[[F1]]
+
 }
 
 
@@ -143,11 +146,12 @@
 
 ; CHECK-LABEL:        call_d2:
 ; CHECK:        call16(d2)
-; CHECK-NOT:    ldc1
-; CHECK:        add.d    $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]]
-}
-
+; CHECK:        addiu $4, $sp, [[O0:[0-9]+]]
+; CHECK-DAG:    ldc1 $f[[F0:[0-9]+]], 24($sp)
+; CHECK-DAG:    ldc1 $f[[F1:[0-9]+]], [[O0]]($sp)
+; CHECK:        add.d    $f0, $f[[F1]], $f[[F0]]
 
+}
 
 ; Check that function returns vector on stack in cases when vector can't be
 ; returned in registers. Also check that vector is placed on stack starting
@@ -179,11 +183,12 @@
   ret <4 x float> %vecins4
 
 ; CHECK-LABEL:        return_f4:
-; CHECK-DAG:    lwc1    $[[R0:[a-z0-9]+]], 16($sp)
-; CHECK-DAG:    swc1    $[[R0]], 12($4)
+; CHECK-DAG:    lwc1    $f[[R0:[0-9]+]], 16($sp)
+; CHECK-DAG:    swc1    $f[[R0]], 12($4)
 ; CHECK-DAG:    sw      $7, 8($4)
 ; CHECK-DAG:    sw      $6, 4($4)
 ; CHECK-DAG:    sw      $5, 0($4)
+
 }
 
 
@@ -227,8 +232,8 @@
   ret <2 x float> %vecins2
 
 ; CHECK-LABEL:        return_f2:
-; CHECK:        mov.s   $f0, $f12
-; CHECK:        mov.s   $f2, $f14
+; CHECK-DAG:    sw   $5, 0($4)
+; CHECK-DAG:    sw   $6, 4($4)
 }
 
 
@@ -239,6 +244,10 @@
   ret <2 x double> %vecins2
 
 ; CHECK-LABEL:        return_d2:
-; CHECK:        mov.d   $f0, $f12
-; CHECK:        mov.d   $f2, $f14
+; CHECK-DAG:    ldc1 $f[[F0:[0-9]]], 16($sp)
+; CHECK-DAG:    sdc1 $f[[F0]], 8($4)
+; CHECK-DAG:    mtc1 $6, $f[[F1:[0-9]+]]
+; CHECK-DAG:    mtc1 $7, $f
+; CHECK-DAG:    sdc1 $f[[F0]], 0($4)
+
 }