Index: llvm/trunk/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h +++ llvm/trunk/include/llvm/Target/TargetLowering.h @@ -662,6 +662,16 @@ unsigned &NumIntermediates, MVT &RegisterVT) const; + /// Certain targets such as MIPS require that some types such as vectors are + /// always broken down into scalars in some contexts. This occurs even if the + /// vector type is legal. + virtual unsigned getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const { + return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, + RegisterVT); + } + struct IntrinsicInfo { unsigned opc = 0; // target opcode EVT memVT; // memory VT @@ -1002,6 +1012,33 @@ llvm_unreachable("Unsupported extended type!"); } + /// Certain combinations of ABIs, Targets and features require that types + /// are legal for some operations and not for other operations. + /// For MIPS all vector types must be passed through the integer register set. + virtual MVT getRegisterTypeForCallingConv(MVT VT) const { + return getRegisterType(VT); + } + + virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const { + return getRegisterType(Context, VT); + } + + /// Certain targets require unusual breakdowns of certain types. For MIPS, + /// this occurs when a vector type is used, as vector are passed through the + /// integer register set. + virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const { + return getNumRegisters(Context, VT); + } + + /// Certain targets have context senstive alignment requirements, where one + /// type has the alignment requirement of another type. + virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const { + return DL.getABITypeAlignment(ArgTy); + } + /// If true, then instruction selection should seek to shrink the FP constant /// of the specified type to a smaller type in order to save space and / or /// reduce runtime. Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -973,18 +973,28 @@ /// expanded value requires multiple registers. SmallVector Regs; + /// This list holds the number of registers for each value. + SmallVector RegCount; + + /// Records if this value needs to be treated in an ABI dependant manner, + /// different to normal type legalization. + bool IsABIMangled; + RegsForValue(); - RegsForValue(const SmallVector ®s, MVT regvt, EVT valuevt); + RegsForValue(const SmallVector ®s, MVT regvt, EVT valuevt, + bool IsABIMangledValue = false); RegsForValue(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout &DL, unsigned Reg, Type *Ty); + const DataLayout &DL, unsigned Reg, Type *Ty, + bool IsABIMangledValue = false); /// Add the specified values to this one. void append(const RegsForValue &RHS) { ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + RegCount.push_back(RHS.Regs.size()); } /// Emit a series of CopyFromReg nodes that copies from this value and returns Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -115,7 +115,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, const Value *V); + MVT PartVT, EVT ValueVT, const Value *V, + bool IsABIRegCopy); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type @@ -125,10 +126,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, - Optional AssertOp = None) { + Optional AssertOp = None, + bool IsABIRegCopy = false) { if (ValueVT.isVector()) return getCopyFromPartsVector(DAG, DL, Parts, NumParts, - PartVT, ValueVT, V); + PartVT, ValueVT, V, IsABIRegCopy); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -272,7 +274,8 @@ /// ValueVT (ISD::AssertSext). static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, const Value *V) { + MVT PartVT, EVT ValueVT, const Value *V, + bool IsABIRegCopy) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -283,9 +286,18 @@ EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; - unsigned NumRegs = - TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, - NumIntermediates, RegisterVT); + unsigned NumRegs; + + if (IsABIRegCopy) { + NumRegs = TLI.getVectorTypeBreakdownForCallingConv( + *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + } else { + NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + } + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); @@ -314,9 +326,14 @@ // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the // intermediate operands. + EVT BuiltVectorTy = + EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(), + (IntermediateVT.isVector() + ? IntermediateVT.getVectorNumElements() * NumParts + : NumIntermediates)); Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, - DL, ValueVT, Ops); + DL, BuiltVectorTy, Ops); } // There is now one part, held in Val. Correct it to match ValueVT. @@ -355,13 +372,30 @@ TLI.isTypeLegal(ValueVT)) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - // Handle cases such as i8 -> <1 x i1> if (ValueVT.getVectorNumElements() != 1) { - diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, - "non-trivial scalar-to-vector conversion"); + + // Certain ABIs require that vectors are passed as integers. For vectors + // are the same size, this is an obvious bitcast. + if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) { + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) { + // Bitcast Val back the original type and extract the corresponding + // vector we want. + unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits(); + EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(), + ValueVT.getVectorElementType(), Elts); + Val = DAG.getBitcast(WiderVecType, Val); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + diagnosePossiblyInvalidConstraint( + *DAG.getContext(), V, "non-trivial scalar-to-vector conversion"); return DAG.getUNDEF(ValueVT); } + // Handle cases such as i8 -> <1 x i1> if (ValueVT.getVectorNumElements() == 1 && ValueVT.getVectorElementType() != PartEVT) Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType()); @@ -371,7 +405,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, SDValue Val, SDValue *Parts, unsigned NumParts, - MVT PartVT, const Value *V); + MVT PartVT, const Value *V, bool IsABIRegCopy); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for @@ -379,12 +413,14 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, - ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { + ISD::NodeType ExtendKind = ISD::ANY_EXTEND, + bool IsABIRegCopy = false) { EVT ValueVT = Val.getValueType(); // Handle the vector case separately. if (ValueVT.isVector()) - return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V, + IsABIRegCopy); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; @@ -509,7 +545,9 @@ /// value split into legal parts. static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, - MVT PartVT, const Value *V) { + MVT PartVT, const Value *V, + bool IsABIRegCopy) { + EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -550,15 +588,22 @@ // Promoted vector extract Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); - } else{ + } else { // Vector -> scalar conversion. - assert(ValueVT.getVectorNumElements() == 1 && - "Only trivial vector-to-scalar conversions should get here!"); - Val = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + if (ValueVT.getVectorNumElements() == 1) { + Val = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); - Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); + } else { + assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() && + "lossy conversion of vector to scalar type"); + EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), + ValueVT.getSizeInBits()); + Val = DAG.getBitcast(IntermediateType, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); + } } Parts[0] = Val; @@ -569,15 +614,31 @@ EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; - unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, - IntermediateVT, - NumIntermediates, RegisterVT); + unsigned NumRegs; + if (IsABIRegCopy) { + NumRegs = TLI.getVectorTypeBreakdownForCallingConv( + *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + } else { + NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + } unsigned NumElements = ValueVT.getVectorNumElements(); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + // Convert the vector to the appropiate type if necessary. + unsigned DestVectorNoElts = + NumIntermediates * + (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1); + EVT BuiltVectorTy = EVT::getVectorVT( + *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts); + if (Val.getValueType() != BuiltVectorTy) + Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val); + // Split the vector into intermediate operands. SmallVector Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { @@ -610,22 +671,31 @@ } } -RegsForValue::RegsForValue() {} +RegsForValue::RegsForValue() { IsABIMangled = false; } RegsForValue::RegsForValue(const SmallVector ®s, MVT regvt, - EVT valuevt) - : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} + EVT valuevt, bool IsABIMangledValue) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), + RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {} RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout &DL, unsigned Reg, Type *Ty) { + const DataLayout &DL, unsigned Reg, Type *Ty, + bool IsABIMangledValue) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); + IsABIMangled = IsABIMangledValue; + for (EVT ValueVT : ValueVTs) { - unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT); - MVT RegisterVT = TLI.getRegisterType(Context, ValueVT); + unsigned NumRegs = IsABIMangledValue + ? TLI.getNumRegistersForCallingConv(Context, ValueVT) + : TLI.getNumRegisters(Context, ValueVT); + MVT RegisterVT = IsABIMangledValue + ? TLI.getRegisterTypeForCallingConv(Context, ValueVT) + : TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); + RegCount.push_back(NumRegs); Reg += NumRegs; } } @@ -646,8 +716,10 @@ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - MVT RegisterVT = RegVTs[Value]; + unsigned NumRegs = RegCount[Value]; + MVT RegisterVT = IsABIMangled + ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) + : RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -742,9 +814,11 @@ unsigned NumRegs = Regs.size(); SmallVector Parts(NumRegs); for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - MVT RegisterVT = RegVTs[Value]; + unsigned NumParts = RegCount[Value]; + + MVT RegisterVT = IsABIMangled + ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) + : RegVTs[Value]; if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; @@ -967,10 +1041,16 @@ if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; + bool IsABIRegCopy = + V && ((isa(V) && + !(static_cast(V))->isInlineAsm()) || + isa(V)); + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), InReg, Ty); + DAG.getDataLayout(), InReg, Ty, IsABIRegCopy); SDValue Chain = DAG.getEntryNode(); - Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); + Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, + V); resolveDanglingDebugInfo(V, Result); } @@ -1157,8 +1237,13 @@ // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + bool IsABIRegCopy = + V && ((isa(V) && + !(static_cast(V))->isInlineAsm()) || + isa(V)); + RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, - Inst->getType()); + Inst->getType(), IsABIRegCopy); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -1386,12 +1471,12 @@ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); - unsigned NumParts = TLI.getNumRegisters(Context, VT); - MVT PartVT = TLI.getRegisterType(Context, VT); + unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT); + MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT); SmallVector Parts(NumParts); getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), - &Parts[0], NumParts, PartVT, &I, ExtendKind); + &Parts[0], NumParts, PartVT, &I, ExtendKind, true); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); @@ -7064,8 +7149,8 @@ SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to - MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, - Chain, &Flag, CS.getInstruction()); + MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, + CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), dl, DAG, AsmNodeOperands); @@ -7681,8 +7766,10 @@ } else { for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT RegisterVT = + getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumRegs = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT; @@ -7731,7 +7818,11 @@ SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); + + // Certain targets (such as MIPS), may have a different ABI alignment + // for a type depending on the context. Give the target a chance to + // specify the alignment it wants. + unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL); if (Args[i].IsZExt) Flags.setZExt(); @@ -7786,8 +7877,9 @@ Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); - MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumParts = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); SmallVector Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; @@ -7817,7 +7909,8 @@ } getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, - CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind); + CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind, + true); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -7917,12 +8010,14 @@ unsigned CurReg = 0; for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT RegisterVT = + getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumRegs = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], NumRegs, RegisterVT, VT, nullptr, - AssertOp)); + AssertOp, true)); CurReg += NumRegs; } @@ -7958,8 +8053,15 @@ assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // If this is an InlineAsm we have to match the registers required, not the + // notional registers required by the type. + bool IsABIRegCopy = + V && ((isa(V) && + !(static_cast(V))->isInlineAsm()) || + isa(V)); + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, - V->getType()); + V->getType(), IsABIRegCopy); SDValue Chain = DAG.getEntryNode(); ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == @@ -8202,7 +8304,12 @@ EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); + + // Certain targets (such as MIPS), may have a different ABI alignment + // for a type depending on the context. Give the target a chance to + // specify the alignment it wants. + unsigned OriginalAlignment = + TLI->getABIAlignmentForCallingConv(ArgTy, DL); if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) Flags.setZExt(); @@ -8264,8 +8371,10 @@ if (ArgCopyElisionCandidates.count(&Arg)) Flags.setCopyElisionCandidate(); - MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); + MVT RegisterVT = + TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT); + unsigned NumRegs = + TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, Idx-1, PartBase+i*RegisterVT.getStoreSize()); @@ -8372,8 +8481,10 @@ for (unsigned Val = 0; Val != NumValues; ++Val) { EVT VT = ValueVTs[Val]; - MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); + MVT PartVT = + TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT); + unsigned NumParts = + TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT); // Even an apparant 'unused' swifterror argument needs to be returned. So // we do generate a copy for it that can be used on return from the @@ -8386,7 +8497,8 @@ AssertOp = ISD::AssertZext; ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, - PartVT, VT, nullptr, AssertOp)); + PartVT, VT, nullptr, AssertOp, + true)); } i += NumParts; Index: llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -835,7 +835,7 @@ // completely and make statepoint call to return a tuple. unsigned Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), Reg, RetTy); + DAG.getDataLayout(), Reg, RetTy, true); SDValue Chain = DAG.getEntryNode(); RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr); Index: llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp @@ -1616,8 +1616,10 @@ VT = MinVT; } - unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); - MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + unsigned NumParts = + TLI.getNumRegistersForCallingConv(ReturnType->getContext(), VT); + MVT PartVT = + TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), VT); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); Index: llvm/trunk/lib/Target/Mips/MipsCCState.h =================================================================== --- llvm/trunk/lib/Target/Mips/MipsCCState.h +++ llvm/trunk/lib/Target/Mips/MipsCCState.h @@ -45,16 +45,33 @@ const SDNode *CallNode); /// Identify lowered values that originated from f128 arguments and record - /// this. + /// this for use by RetCC_MipsN. void PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl &Ins); + void PreAnalyzeCallResultForVectorFloat( + const SmallVectorImpl &Ins, + const TargetLowering::CallLoweringInfo &CLI); + + void PreAnalyzeFormalArgumentsForVectorFloat( + const SmallVectorImpl &Ins); + + void + PreAnalyzeReturnForVectorFloat(const SmallVectorImpl &Outs); + /// Records whether the value has been lowered from an f128. SmallVector OriginalArgWasF128; /// Records whether the value has been lowered from float. SmallVector OriginalArgWasFloat; + /// Records whether the value has been lowered from a floating point vector. + SmallVector OriginalArgWasFloatVector; + + /// Records whether the return value has been lowered from a floating point + /// vector. + SmallVector OriginalRetWasFloatVector; + /// Records whether the value was a fixed argument. /// See ISD::OutputArg::IsFixed, SmallVector CallOperandIsFixed; @@ -78,6 +95,7 @@ CCState::AnalyzeCallOperands(Outs, Fn); OriginalArgWasF128.clear(); OriginalArgWasFloat.clear(); + OriginalArgWasFloatVector.clear(); CallOperandIsFixed.clear(); } @@ -96,31 +114,38 @@ CCState::AnalyzeFormalArguments(Ins, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); } void AnalyzeCallResult(const SmallVectorImpl &Ins, CCAssignFn Fn, const TargetLowering::CallLoweringInfo &CLI) { PreAnalyzeCallResultForF128(Ins, CLI); + PreAnalyzeCallResultForVectorFloat(Ins, CLI); CCState::AnalyzeCallResult(Ins, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); } void AnalyzeReturn(const SmallVectorImpl &Outs, CCAssignFn Fn) { PreAnalyzeReturnForF128(Outs); + PreAnalyzeReturnForVectorFloat(Outs); CCState::AnalyzeReturn(Outs, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); } bool CheckReturn(const SmallVectorImpl &ArgsFlags, CCAssignFn Fn) { PreAnalyzeReturnForF128(ArgsFlags); + PreAnalyzeReturnForVectorFloat(ArgsFlags); bool Return = CCState::CheckReturn(ArgsFlags, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); return Return; } @@ -128,6 +153,13 @@ bool WasOriginalArgFloat(unsigned ValNo) { return OriginalArgWasFloat[ValNo]; } + bool WasOriginalArgVectorFloat(unsigned ValNo) const { + return OriginalArgWasFloatVector[ValNo]; + } + bool WasOriginalRetVectorFloat(unsigned ValNo) const { + return OriginalRetWasFloatVector[ValNo]; + } + bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; } SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; } }; Index: llvm/trunk/lib/Target/Mips/MipsCCState.cpp =================================================================== --- llvm/trunk/lib/Target/Mips/MipsCCState.cpp +++ llvm/trunk/lib/Target/Mips/MipsCCState.cpp @@ -54,6 +54,22 @@ return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol())); } +/// Return true if the original type was vXfXX. +static bool originalEVTTypeIsVectorFloat(EVT Ty) { + if (Ty.isVector() && Ty.getVectorElementType().isFloatingPoint()) + return true; + + return false; +} + +/// Return true if the original type was vXfXX / vXfXX. +static bool originalTypeIsVectorFloat(Type * Ty) { + if (Ty->isVectorTy() && Ty->isFPOrFPVectorTy()) + return true; + + return false; +} + MipsCCState::SpecialCallingConvType MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee, const MipsSubtarget &Subtarget) { @@ -81,8 +97,8 @@ } } -/// Identify lowered values that originated from f128 arguments and record -/// this for use by RetCC_MipsN. +/// Identify lowered values that originated from f128 or float arguments and +/// record this for use by RetCC_MipsN. void MipsCCState::PreAnalyzeReturnForF128( const SmallVectorImpl &Outs) { const MachineFunction &MF = getMachineFunction(); @@ -94,26 +110,50 @@ } } -/// Identify lowered values that originated from f128 arguments and record +/// Identify lower values that originated from vXfXX and record +/// this. +void MipsCCState::PreAnalyzeCallResultForVectorFloat( + const SmallVectorImpl &Ins, + const TargetLowering::CallLoweringInfo &CLI) { + for (unsigned i = 0; i < Ins.size(); ++i) { + OriginalRetWasFloatVector.push_back( + originalTypeIsVectorFloat(CLI.RetTy)); + } +} + +/// Identify lowered values that originated from vXfXX arguments and record /// this. +void MipsCCState::PreAnalyzeReturnForVectorFloat( + const SmallVectorImpl &Outs) { + for (unsigned i = 0; i < Outs.size(); ++i) { + ISD::OutputArg Out = Outs[i]; + OriginalRetWasFloatVector.push_back( + originalEVTTypeIsVectorFloat(Out.ArgVT)); + } +} +/// Identify lowered values that originated from f128, float and sret to vXfXX +/// arguments and record this. void MipsCCState::PreAnalyzeCallOperands( const SmallVectorImpl &Outs, std::vector &FuncArgs, const SDNode *CallNode) { for (unsigned i = 0; i < Outs.size(); ++i) { - OriginalArgWasF128.push_back( - originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, CallNode)); - OriginalArgWasFloat.push_back( - FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy()); + TargetLowering::ArgListEntry FuncArg = FuncArgs[Outs[i].OrigArgIndex]; + + OriginalArgWasF128.push_back(originalTypeIsF128(FuncArg.Ty, CallNode)); + OriginalArgWasFloat.push_back(FuncArg.Ty->isFloatingPointTy()); + + OriginalArgWasFloatVector.push_back(FuncArg.Ty->isVectorTy()); CallOperandIsFixed.push_back(Outs[i].IsFixed); } } -/// Identify lowered values that originated from f128 arguments and record -/// this. +/// Identify lowered values that originated from f128, float and vXfXX arguments +/// and record this. void MipsCCState::PreAnalyzeFormalArgumentsForF128( const SmallVectorImpl &Ins) { const MachineFunction &MF = getMachineFunction(); + for (unsigned i = 0; i < Ins.size(); ++i) { Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); @@ -123,6 +163,7 @@ if (Ins[i].Flags.isSRet()) { OriginalArgWasF128.push_back(false); OriginalArgWasFloat.push_back(false); + OriginalArgWasFloatVector.push_back(false); continue; } @@ -132,5 +173,10 @@ OriginalArgWasF128.push_back( originalTypeIsF128(FuncArg->getType(), nullptr)); OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy()); + + // The MIPS vector ABI exhibits a corner case of sorts or quirk; if the + // first argument is actually an SRet pointer to a vector, then the next + // argument slot is $a2. + OriginalArgWasFloatVector.push_back(FuncArg->getType()->isVectorTy()); } } Index: llvm/trunk/lib/Target/Mips/MipsCallingConv.td =================================================================== --- llvm/trunk/lib/Target/Mips/MipsCallingConv.td +++ llvm/trunk/lib/Target/Mips/MipsCallingConv.td @@ -37,6 +37,10 @@ class CCIfArgIsVarArg : CCIf<"!static_cast(&State)->IsCallOperandFixed(ValNo)", A>; +/// Match if the return was a floating point vector. +class CCIfOrigArgWasNotVectorFloat + : CCIf<"!static_cast(&State)" + "->WasOriginalRetVectorFloat(ValNo)", A>; /// Match if the special calling conv is the specified value. class CCIfSpecialCallingConv @@ -93,8 +97,10 @@ // Promote i1/i8/i16 return values to i32. CCIfType<[i1, i8, i16], CCPromoteToType>, - // i32 are returned in registers V0, V1, A0, A1 - CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>, + // i32 are returned in registers V0, V1, A0, A1, unless the original return + // type was a vector of floats. + CCIfOrigArgWasNotVectorFloat>>, // f32 are returned in registers F0, F2 CCIfType<[f32], CCAssignToReg<[F0, F2]>>, Index: llvm/trunk/lib/Target/Mips/MipsISelLowering.h =================================================================== --- llvm/trunk/lib/Target/Mips/MipsISelLowering.h +++ llvm/trunk/lib/Target/Mips/MipsISelLowering.h @@ -248,6 +248,33 @@ bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; + /// Return the register type for a given MVT, ensuring vectors are treated + /// as a series of gpr sized integers. + virtual MVT getRegisterTypeForCallingConv(MVT VT) const override; + + /// Return the register type for a given MVT, ensuring vectors are treated + /// as a series of gpr sized integers. + virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const override; + + /// Return the number of registers for a given MVT, ensuring vectors are + /// treated as a series of gpr sized integers. + virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const override; + + /// Break down vectors to the correct number of gpr sized integers. + virtual unsigned getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const override; + + /// Return the correct alignment for the current calling convention. + virtual unsigned + getABIAlignmentForCallingConv(Type *ArgTy, DataLayout DL) const override { + if (ArgTy->isVectorTy()) + return std::min(DL.getABITypeAlignment(ArgTy), 8U); + return DL.getABITypeAlignment(ArgTy); + } + ISD::NodeType getExtendForAtomicOps() const override { return ISD::SIGN_EXTEND; } Index: llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp +++ llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp @@ -71,6 +71,48 @@ return true; } +// The MIPS MSA ABI passes vector arguments in the integer register set. +// The number of integer registers used is dependant on the ABI used. +MVT MipsTargetLowering::getRegisterTypeForCallingConv(MVT VT) const { + if (VT.isVector() && Subtarget.hasMSA()) + return Subtarget.isABI_O32() ? MVT::i32 : MVT::i64; + return MipsTargetLowering::getRegisterType(VT); +} + +MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const { + if (VT.isVector()) { + if (Subtarget.isABI_O32()) { + return MVT::i32; + } else { + return (VT.getSizeInBits() == 32) ? MVT::i32 : MVT::i64; + } + } + return MipsTargetLowering::getRegisterType(Context, VT); +} + +unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const { + if (VT.isVector()) + return std::max((VT.getSizeInBits() / (Subtarget.isABI_O32() ? 32 : 64)), + 1U); + return MipsTargetLowering::getNumRegisters(Context, VT); +} + +unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const { + + // Break down vector types to either 2 i64s or 4 i32s. + RegisterVT = getRegisterTypeForCallingConv(Context, VT) ; + IntermediateVT = RegisterVT; + NumIntermediates = VT.getSizeInBits() < RegisterVT.getSizeInBits() + ? VT.getVectorNumElements() + : VT.getSizeInBits() / RegisterVT.getSizeInBits(); + + return NumIntermediates; +} + SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const { MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo(); return DAG.getRegister(FI->getGlobalBaseReg(), Ty); @@ -2515,6 +2557,11 @@ // yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is // not used, it must be shadowed. If only A3 is available, shadow it and // go to stack. +// vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack. +// vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3} +// with the remainder spilled to the stack. +// vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases +// spilling the remainder to the stack. // // For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack. //===----------------------------------------------------------------------===// @@ -2526,8 +2573,13 @@ State.getMachineFunction().getSubtarget()); static const MCPhysReg IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; + + const MipsCCState * MipsState = static_cast(&State); + static const MCPhysReg F32Regs[] = { Mips::F12, Mips::F14 }; + static const MCPhysReg FloatVectorIntRegs[] = { Mips::A0, Mips::A2 }; + // Do not process byval args here. if (ArgFlags.isByVal()) return true; @@ -2565,8 +2617,26 @@ State.getFirstUnallocated(F32Regs) != ValNo; unsigned OrigAlign = ArgFlags.getOrigAlign(); bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8); + bool isVectorFloat = MipsState->WasOriginalArgVectorFloat(ValNo); - if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { + // The MIPS vector ABI for floats passes them in a pair of registers + if (ValVT == MVT::i32 && isVectorFloat) { + // This is the start of an vector that was scalarized into an unknown number + // of components. It doesn't matter how many there are. Allocate one of the + // notional 8 byte aligned registers which map onto the argument stack, and + // shadow the register lost to alignment requirements. + if (ArgFlags.isSplit()) { + Reg = State.AllocateReg(FloatVectorIntRegs); + if (Reg == Mips::A2) + State.AllocateReg(Mips::A1); + else if (Reg == 0) + State.AllocateReg(Mips::A3); + } else { + // If we're an intermediate component of the split, we can just attempt to + // allocate a register directly. + Reg = State.AllocateReg(IntRegs); + } + } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { Reg = State.AllocateReg(IntRegs); // If this is the first part of an i64 arg, // the allocated register must be either A0 or A2. Index: llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp +++ llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp @@ -283,10 +283,12 @@ int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); uint64_t stackSize = MF.getFrameInfo().getStackSize(); int64_t spOffset = MF.getFrameInfo().getObjectOffset(FrameIndex); + unsigned alignment = MF.getFrameInfo().getObjectAlignment(FrameIndex); DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" << "spOffset : " << spOffset << "\n" - << "stackSize : " << stackSize << "\n"); + << "stackSize : " << stackSize << "\n" + << "alignment : " << alignment << "\n"); eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset); } Index: llvm/trunk/test/CodeGen/Mips/cconv/vector.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/cconv/vector.ll +++ llvm/trunk/test/CodeGen/Mips/cconv/vector.ll @@ -0,0 +1,1657 @@ +; RUN: llc < %s -march=mips -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EB +; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB +; RUN: llc < %s -march=mips -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EB +; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5 +; RUN: llc < %s -march=mipsel -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EL +; RUN: llc < %s -march=mips64el -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL +; RUN: llc < %s -march=mipsel -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EL +; RUN: llc < %s -march=mips64el -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5 + + + +; Test that vector types are passed through the integer register set whether or +; not MSA is enabled. This is a ABI requirement for MIPS. For GCC compatibility +; we need to handle any power of 2 number of elements. We will test this +; exhaustively for combinations up to MSA register (128 bits) size. + +; First set of tests are for argument passing. + +define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) { +; ALL-LABEL: i8_2: +; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 24 +; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 24 +; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 16 +; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 16 + +; MIPS32EL: addu $1, $4, $5 + +; MIPS32R5-DAG: sw $4 +; MIPS32R5-DAG: sw $5 + +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 56 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 56 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 48 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 48 + +; MIPS64EL-DAG: sll ${{[0-9]+}}, $4, 0 +; MIPS64EL-DAG: sll ${{[0-9]+}}, $5, 0 + +; MIPS64R5-DAG: sd $4 +; MIPS64R5-DAG: sd $5 + + %1 = add <2 x i8> %a, %b + ret <2 x i8> %1 +} + +; Test that vector spilled to the outgoing argument area have the expected +; offset from $sp. + +define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, + <2 x i8> %e, <2 x i8> %f, <2 x i8> %g) { +entry: + +; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 24 +; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 24 +; MIPS32EB-DAG: srl ${{[0-9]+}}, $6, 24 +; MIPS32EB-DAG: srl ${{[0-9]+}}, $7, 24 + +; MIPS32EL-DAG: andi ${{[0-9]+}}, $4, 65280 +; MIPS32EL-DAG: andi ${{[0-9]+}}, $5, 65280 +; MIPS32EL-DAG: andi ${{[0-9]+}}, $6, 65280 +; MIPS32EL-DAG: andi ${{[0-9]+}}, $7, 65280 + +; MIPS32-DAG: lbu ${{[0-9]+}}, 16($sp) +; MIPS32-DAG; lbu ${{[0-9]+}}, 17($sp) +; MIPS32-DAG: lbu ${{[0-9]+}}, 20($sp) +; MIPS32-DAG: lbu ${{[0-9]+}}, 21($sp) +; MIPS32-DAG: lbu ${{[0-9]+}}, 24($sp) +; MIPS32-DAG: lbu ${{[0-9]+}}, 25($sp) + +; MIPS32R5-DAG: sw $4, {{[0-9]+}}($sp) +; MIPS32R5-DAG: sw $5, {{[0-9]+}}($sp) +; MIPS32R5-DAG: sw $6, {{[0-9]+}}($sp) +; MIPS32R5-DAG: sw $7, {{[0-9]+}}($sp) + +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 40($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 41($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 42($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 43($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 44($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 45($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 46($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 47($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 48($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 49($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 50($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 51($sp) + +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 48 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 48 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $6, 48 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $7, 48 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $8, 48 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $9, 48 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $10, 48 + +; MIPS64R5-DAG: sd $4, {{[0-9]+}}($sp) +; MIPS64R5-DAG: sd $5, {{[0-9]+}}($sp) +; MIPS64R5-DAG: sd $6, {{[0-9]+}}($sp) +; MIPS64R5-DAG: sd $7, {{[0-9]+}}($sp) +; MIPS64R5-DAG: sd $8, {{[0-9]+}}($sp) +; MIPS64R5-DAG: sd $9, {{[0-9]+}}($sp) +; MIPS64R5-DAG: sd $10, {{[0-9]+}}($sp) + + %0 = add <2 x i8> %a, %b + %1 = add <2 x i8> %0, %c + %2 = add <2 x i8> %1, %d + %3 = add <2 x i8> %2, %e + %4 = add <2 x i8> %3, %f + %5 = add <2 x i8> %4, %g + ret <2 x i8> %5 +} + +define <4 x i8> @i8_4(<4 x i8> %a, <4 x i8> %b) { +; ALL-LABEL: i8_4: +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8 + +; MIPS32R5-DAG: sw $4 +; MIPS32R5-DAG: sw $5 + +; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0 +; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0 + +; MIPS64R5-DAG: sll ${{[0-9]+}}, $4, 0 +; MIPS64R5-DAG: sll ${{[0-9]+}}, $5, 0 + + %1 = add <4 x i8> %a, %b + ret <4 x i8> %1 +} + +define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) { +; ALL-LABEL: i8_8: +; MIPS32-NOT: lw +; MIPS32-DAG: srl ${{[0-9]+}}, $7, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $6, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $7, 8 +; MIPS32-DAG: srl ${{[0-9]+}}, $6, 8 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8 + +; MIPS32R5-DAG: sw $4 +; MIPS32R5-DAG: sw $5 +; MIPS32R5-DAG: sw $6 +; MIPS32R5-DAG: sw $7 + +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 56 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 56 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 40 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 40 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32 +; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0 +; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0 +; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 24 +; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 24 +; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 16 +; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 16 +; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 8 +; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 8 + +; MIPS64R5-DAG: sd $4 +; MIPS64R5-DAG: sd $5 + + %1 = add <8 x i8> %a, %b + ret <8 x i8> %1 +} + +define <16 x i8> @i8_16(<16 x i8> %a, <16 x i8> %b) { +; ALL-LABEL: i8_16: +; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp) +; MIPS32-DAG: srl ${{[0-9]+}}, $7, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $6, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $7, 8 +; MIPS32-DAG: srl ${{[0-9]+}}, $6, 8 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8 + +; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $5 +; MIPS32R5-DAG: insert.w $w[[W0]][2], $6 +; MIPS32R5-DAG: insert.w $w[[W0]][3], $7 + +; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 56 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 56 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 40 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 40 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 56 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 56 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32 + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4 +; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5 +; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6 +; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7 + + %1 = add <16 x i8> %a, %b + + ret <16 x i8> %1 +} + +define <2 x i16> @i16_2(<2 x i16> %a, <2 x i16> %b) { +; ALL-LABEL: i16_2: +; MIPS32: addu $[[R0:[0-9]+]], $4, $5 +; MIPS32: andi $[[R1:[0-9]+]], $[[R0]], 65535 +; MIPS32: srl $[[R2:[0-9]+]], $5, 16 +; MIPS32: srl $[[R3:[0-9]+]], $4, 16 +; MIPS32: addu $[[R4:[0-9]+]], $[[R3]], $[[R2]] +; MIPS32: sll $2, $[[R4]], 16 + +; MIPS32R5-DAG: sw $4 +; MIPS32R5-DAG: sw $5 + +; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0 +; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0 + +; MIPS64R5-DAG: sll ${{[0-9]+}}, $4, 0 +; MIPS64R5-DAG: sll ${{[0-9]+}}, $5, 0 + + %1 = add <2 x i16> %a, %b + ret <2 x i16> %1 +} + +define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) { +; ALL-LABEL: i16_4: +; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16 + +; MIPS32R5-DAG: sw $4 +; MIPS32R5-DAG: sw $5 +; MIPS32R5-DAG: sw $6 +; MIPS32R5-DAG: sw $7 + +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32 + +; MIPS64R5-DAG: sd $4 +; MIPS64R5-DAG: sd $5 + + %1 = add <4 x i16> %a, %b + ret <4 x i16> %1 +} + +define <8 x i16> @i16_8(<8 x i16> %a, <8 x i16> %b) { +; ALL-LABEL: i16_8: +; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp) +; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16 + +; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $5 +; MIPS32R5-DAG: insert.w $w[[W0]][2], $6 +; MIPS32R5-DAG: insert.w $w[[W0]][3], $7 + +; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32 + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4 +; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5 +; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6 +; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7 + + %1 = add <8 x i16> %a, %b + ret <8 x i16> %1 +} + +define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) { +; ALL-LABEL: i32_2: +; MIPS32-DAG: addu $2, $4, $6 +; MIPS32-DAG: addu $3, $5, $7 + +; MIPS32R5-DAG: sw $4 +; MIPS32R5-DAG: sw $5 +; MIPS32R5-DAG: sw $6 +; MIPS32R5-DAG: sw $7 + +; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0 +; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0 + +; MIPS64R5-DAG: sd $4 +; MIPS64R5-DAG: sd $5 + + %1 = add <2 x i32> %a, %b + + ret <2 x i32> %1 +} + +define <4 x i32> @i32_4(<4 x i32> %a, <4 x i32> %b) { +; ALL-LABEL: i32_4: +; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp) +; MIPS32-DAG: addu $2 +; MIPS32-DAG: addu $3 +; MIPS32-DAG: addu $4 +; MIPS32-DAG: addu $5 + +; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $5 +; MIPS32R5-DAG: insert.w $w[[W0]][2], $6 +; MIPS32R5-DAG: insert.w $w[[W0]][3], $7 + +; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0 +; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0 +; MIPS64-DAG: sll ${{[0-9]+}}, $6, 0 +; MIPS64-DAG: sll ${{[0-9]+}}, $7, 0 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32 + %1 = add <4 x i32> %a, %b + ret <4 x i32> %1 +} + +define <2 x i64> @i64_2(<2 x i64> %a, <2 x i64> %b) { +; ALL-LABEL: i64_2: +; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp) +; MIPS32-DAG: addu $2 +; MIPS32-DAG: addu $3 +; MIPS32-DAG: addu $4 +; MIPS32-DAG: addu $5 + +; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $5 +; MIPS32R5-DAG: insert.w $w[[W0]][2], $6 +; MIPS32R5-DAG: insert.w $w[[W0]][3], $7 + +; MIPS64-DAG: daddu $2, $4, $6 +; MIPS64-DAG: daddu $3, $5, $7 + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4 +; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5 +; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6 +; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7 + + %1 = add <2 x i64> %a, %b + ret <2 x i64> %1 +} + +; The MIPS vector ABI treats vectors of floats differently to vectors of +; integers. + +; For arguments floating pointer vectors are bitcasted to integer vectors whose +; elements are of GPR width and where the element count is deduced from +; the length of the floating point vector divided by the size of the GPRs. + +; For returns, integer vectors are passed via the GPR register set, but +; floating point vectors are returned via a hidden sret pointer. + +; For testing purposes we skip returning values here and test them below +; instead. +@float_res_v2f32 = external global <2 x float> + +define void @float_2(<2 x float> %a, <2 x float> %b) { +; ALL-LABEL: float_2: +; MIPS32: mtc1 $7, $f[[F0:[0-9]+]] +; MIPS32: mtc1 $5, $f[[F1:[0-9]+]] +; MIPS32: add.s $f[[F2:[0-9]+]], $f[[F1]], $f[[F0]] +; MIPS32: swc1 $f[[F2]] +; MIPS32: mtc1 $6, $f[[F3:[0-9]+]] +; MIPS32: mtc1 $4, $f[[F4:[0-9]+]] +; MIPS32: add.s $f[[F5:[0-9]+]], $f[[F4]], $f[[F3]] +; MIPS32: swc1 $f[[F5]] + +; MIPS32R5-DAG: sw $4 +; MIPS32R5-DAG: sw $5 +; MIPS32R5-DAG: sw $6 +; MIPS32R5-DAG: sw $7 + +; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0 +; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0 +; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}} +; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}} +; MIPS64-DAG: dsrl $[[R2:[0-9]+]], $4, 32 +; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32 +; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R2]], 0 +; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R3]], 0 +; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}} +; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}} + +; MIPS64R5-DAG: sd $4 +; MIPS64R5-DAG: sd $5 + + %1 = fadd <2 x float> %a, %b + store <2 x float> %1, <2 x float> * @float_res_v2f32 + ret void +} + +@float_res_v4f32 = external global <4 x float> + +; For MSA this case is suboptimal, the 4 loads can be combined into a single +; ld.w. + +define void @float_4(<4 x float> %a, <4 x float> %b) { +; ALL-LABEL: float_4: +; MIPS32-DAG: mtc1 $4 +; MIPS32-DAG: mtc1 $5 +; MIPS32-DAG: mtc1 $6 +; MIPS32-DAG: mtc1 $7 +; MIPS32-DAG: lwc1 +; MIPS32-DAG: lwc1 +; MIPS32-DAG: lwc1 +; MIPS32-DAG: lwc1 + +; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 16($sp) +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $[[R1]] +; MIPS32R5-DAG: lw $[[R2:[0-9]+]], 20($sp) +; MIPS32R5-DAG: insert.w $w[[W0]][1], $[[R2]] +; MIPS32R5-DAG: lw $[[R3:[0-9]+]], 24($sp) +; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R3]] +; MIPS32R5-DAG: lw $[[R4:[0-9]+]], 28($sp) +; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R4]] + +; MIPS32R5-DAG: insert.w $w[[W1:[0-9]+]][0], $4 +; MIPS32R5-DAG: insert.w $w[[W1]][1], $5 +; MIPS32R5-DAG: insert.w $w[[W1]][2], $6 +; MIPS32R5-DAG: insert.w $w[[W1]][3], $7 + +; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0 +; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0 +; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}} +; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}} +; MIPS64-DAG: dsrl $[[R2:[0-9]+]], $4, 32 +; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32 +; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R2]], 0 +; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R3]], 0 +; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}} +; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}} +; MIPS64-DAG: sll $[[R6:[0-9]+]], $6, 0 +; MIPS64-DAG: sll $[[R7:[0-9]+]], $7, 0 +; MIPS64-DAG: mtc1 $[[R6]], $f{{[0-9]+}} +; MIPS64-DAG: mtc1 $[[R7]], $f{{[0-9]+}} +; MIPS64-DAG: dsrl $[[R8:[0-9]+]], $6, 32 +; MIPS64-DAG: dsrl $[[R9:[0-9]+]], $7, 32 +; MIPS64-DAG: sll $[[R10:[0-9]+]], $[[R8]], 0 +; MIPS64-DAG: sll $[[R11:[0-9]+]], $[[R9]], 0 +; MIPS64-DAG: mtc1 $[[R10]], $f{{[0-9]+}} +; MIPS64-DAG: mtc1 $[[R11]], $f{{[0-9]+}} + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4 +; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5 +; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6 +; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7 + + %1 = fadd <4 x float> %a, %b + store <4 x float> %1, <4 x float> * @float_res_v4f32 + ret void +} + +@double_v2f64 = external global <2 x double> + +define void @double_2(<2 x double> %a, <2 x double> %b) { +; ALL-LABEL: double_2: +; MIPS32-DAG: sw $7 +; MIPS32-DAG: sw $6 +; MIPS32-DAG: ldc1 +; MIPS32-DAG: ldc1 +; MIPS32: add.d +; MIPS32-DAG: sw $5 +; MIPS32-DAG: sw $4 +; MIPS32-DAG: ldc1 +; MIPS32-DAG: ldc1 +; MIPS32: add.d + +; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 16($sp) +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $[[R1]] +; MIPS32R5-DAG: lw $[[R2:[0-9]+]], 20($sp) +; MIPS32R5-DAG: insert.w $w[[W0]][1], $[[R2]] +; MIPS32R5-DAG: lw $[[R3:[0-9]+]], 24($sp) +; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R3]] +; MIPS32R5-DAG: lw $[[R4:[0-9]+]], 28($sp) +; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R4]] + +; MIPS32R5-DAG: insert.w $w[[W1:[0-9]+]][0], $4 +; MIPS32R5-DAG: insert.w $w[[W1]][1], $5 +; MIPS32R5-DAG: insert.w $w[[W1]][2], $6 +; MIPS32R5-DAG: insert.w $w[[W1]][3], $7 + +; MIPS64-DAG: dmtc1 $6, $f[[R0:[0-9]+]] +; MIPS64-DAG: dmtc1 $4, $f[[R1:[0-9]+]] +; MIPS64-DAG: add.d $f[[R2:[0-9]+]], $f[[R1]], $f[[R0]] +; MIPS64-DAG: dmtc1 $7, $f[[R3:[0-9]+]] +; MIPS64-DAG: dmtc1 $5, $f[[R4:[0-9]+]] +; MIPS64-DAG: add.d $f[[R5:[0-9]+]], $f[[R4]], $f[[R3]] + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4 +; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5 +; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6 +; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7 + + %1 = fadd <2 x double> %a, %b + store <2 x double> %1, <2 x double> * @double_v2f64 + ret void +} + +; Return value testing. +; Integer vectors are returned in $2, $3, $4, $5 for O32, $2, $3 for N32/N64 +; Floating point vectors are returned through a hidden sret pointer. + +@gv2i8 = global <2 x i8> +@gv4i8 = global <4 x i8> +@gv8i8 = global <8 x i8> +@gv16i8 = global <16 x i8> + +@gv2i16 = global <2 x i16> +@gv4i16 = global <4 x i16> +@gv8i16 = global <8 x i16> + +@gv2i32 = global <2 x i32> +@gv4i32 = global <4 x i32> + +@gv2i64 = global <2 x i64> + +define <2 x i8> @ret_2_i8() { +; ALL-LABEL: ret_2_i8: +; MIPS32-DAG: lhu $2 +; MIPS32R5-DAG: lhu $2 + +; FIXME: why is this lh instead of lhu on mips64? + +; MIPS64-DAG: lh $2 +; MIPS64-DAG: lh $2 + %1 = load <2 x i8>, <2 x i8> * @gv2i8 + ret <2 x i8> %1 +} + +define <4 x i8> @ret_4_i8() { +; ALL-LABEL: ret_4_i8: +; MIPS32-DAG: lw $2 +; MIPS32R5-DAG: lw $2 + +; MIPS64-DAG: lw $2 +; MIPS64R5-DAG: lw $2 + + %1 = load <4 x i8>, <4 x i8> * @gv4i8 + ret <4 x i8> %1 +} + +define <8 x i8> @ret_8_i8() { +; ALL-LABEL: ret_8_i8: +; MIPS32-DAG: lw $2 +; MIPS32-DAG: lw $3 + +; MIPS32R5: copy_s.w $2, $w[[W0:[0-9]+]] +; MIPS32R5: copy_s.w $3, $w[[W0]] + +; MIPS64-DAG: ld $2 +; MIPS64R5-DAG: ld $2 + %1 = load <8 x i8>, <8 x i8> * @gv8i8 + ret <8 x i8> %1 +} + +define <16 x i8> @ret_16_i8() { +; ALL-LABEL: ret_16_i8: +; MIPS32-DAG: lw $2 +; MIPS32-DAG: lw $3 +; MIPS32-DAG: lw $4 +; MIPS32-DAG: lw $5 + +; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0] +; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1] +; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2] +; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3] + +; MIPS64-DAG: ld $2 +; MIPS64-DAG: ld $3 + +; MIPS64R5-DAG: copy_s.d $2 +; MIPS64R5-DAG: copy_s.d $3 + + %1 = load <16 x i8>, <16 x i8> * @gv16i8 + ret <16 x i8> %1 +} + +define <2 x i16> @ret_2_i16() { +; ALL-LABEL: ret_2_i16: +; MIPS32-DAG: lw $2 + +; MIPS32R5-DAG: lw $2 + +; MIPS64-DAG: lw $2 + +; MIPS64R5-DAG: lw $2 + %1 = load <2 x i16>, <2 x i16> * @gv2i16 + ret <2 x i16> %1 +} + +define <4 x i16> @ret_4_i16() { +; ALL-LABEL: ret_4_i16: +; MIPS32-DAG: lw $2 +; MIPS32-DAG: lw $3 + +; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]] +; MIPS32R5-DAG: copy_s.w $3, $w[[W0]] + +; MIPS64-DAG: ld $2 +; MIPS64R5-DAG: ld $2 + %1 = load <4 x i16>, <4 x i16> * @gv4i16 + ret <4 x i16> %1 +} + +define <8 x i16> @ret_8_i16() { +; ALL-LABEL: ret_8_i16: +; MIPS32-DAG: lw $2 +; MIPS32-DAG: lw $3 +; MIPS32-DAG: lw $4 +; MIPS32-DAG: lw $5 + +; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0] +; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1] +; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2] +; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3] + +; MIPS64-DAG: ld $2 +; MIPS64-DAG: ld $3 + +; MIPS64R5-DAG: copy_s.d $2 +; MIPS64R5-DAG: copy_s.d $3 + + %1 = load <8 x i16>, <8 x i16> * @gv8i16 + ret <8 x i16> %1 +} + +define <2 x i32> @ret_2_i32() { +; ALL-LABEL: ret_2_i32: +; MIPS32-DAG: lw $2 +; MIPS32-DAG: lw $3 + +; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]] +; MIPS32R5-DAG: copy_s.w $3, $w[[W0]] + +; MIPS64-DAG: ld $2 +; MIPS64R5-DAG: ld $2 + + %1 = load <2 x i32>, <2 x i32> * @gv2i32 + ret <2 x i32> %1 +} + +define <4 x i32> @ret_4_i32() { +; ALL-LABEL: ret_4_i32: +; MIPS32-DAG: lw $2 +; MIPS32-DAG: lw $3 +; MIPS32-DAG: lw $4 +; MIPS32-DAG: lw $5 + +; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0] +; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1] +; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2] +; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3] + +; MIPS64-DAG: ld $2 +; MIPS64-DAG: ld $3 + +; MIPS64R5-DAG: copy_s.d $2, $w[[W0:[0-9]+]] +; MIPS64R5-DAG: copy_s.d $3, $w[[W0]] + + %1 = load <4 x i32>, <4 x i32> * @gv4i32 + ret <4 x i32> %1 +} + +define <2 x i64> @ret_2_i64() { +; ALL-LABEL: ret_2_i64: +; MIPS32-DAG: lw $2 +; MIPS32-DAG: lw $3 +; MIPS32-DAG: lw $4 +; MIPS32-DAG: lw $5 + +; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0] +; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1] +; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2] +; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3] + +; MIPS64-DAG: ld $2 +; MIPS64-DAG: ld $3 + +; MIPS64R5-DAG: copy_s.d $2, $w[[W0:[0-9]+]] +; MIPS64R5-DAG: copy_s.d $3, $w[[W0]] + + %1 = load <2 x i64>, <2 x i64> * @gv2i64 + ret <2 x i64> %1 +} + +@gv2f32 = global <2 x float> +@gv4f32 = global <4 x float> + +define <2 x float> @ret_float_2() { +entry: +; ALL-LABEL: ret_float_2: + +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4) +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4) + +; MIPS32R5-DAG: swc1 $f{{[0-9]+}}, 0($4) +; MIPS32R5-DAG: swc1 $f{{[0-9]+}}, 4($4) + +; MIPS64: ld $2 + +; MIPS64R5: ld $2 + + %0 = load <2 x float>, <2 x float> * @gv2f32 + ret <2 x float> %0 +} + +define <4 x float> @ret_float_4() { +entry: +; ALL-LABEL: ret_float_4: + +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4) +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4) +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4) +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 12($4) + +; MIPS32R5: st.w $w{{[0-9]+}}, 0($4) + +; MIPS64-DAG: ld $2 +; MIPS64-DAG: ld $3 + +; MIPS64R5-DAG: copy_s.d $2, $w{{[0-9]+}}[0] +; MIPS64R5-DAG: copy_s.d $3, $w{{[0-9]+}}[1] + + %0 = load <4 x float>, <4 x float> * @gv4f32 + ret <4 x float> %0 +} + +@gv2f64 = global <2 x double> + +define <2 x double> @ret_double_2() { +entry: +; ALL-LABEL: ret_double_2: + +; MIPS32-DAG: sdc1 $f{{[0-9]+}}, 8($4) +; MIPS32-DAG: sdc1 $f{{[0-9]+}}, 0($4) + +; MIPS32R5: st.d $w{{[0-9]+}}, 0($4) + +; MIPS64-DAG: ld $2 +; MIPS64-DAG: ld $2 + +; MIPS64R5-DAG: copy_s.d $2, $w{{[0-9]+}}[0] +; MIPS64R5-DAG: copy_s.d $3, $w{{[0-9]+}}[1] + + %0 = load <2 x double>, <2 x double> * @gv2f64 + ret <2 x double> %0 +} + +; Test argument lowering and call result lowering. + +define void @call_i8_2() { +entry: +; ALL-LABEL: call_i8_2: +; MIPS32EB-DAG: addiu $4 +; MIPS32EB-DAG: addiu $5 +; MIPS32-NOT: addiu $6 +; MIPS32-NOT: addiu $7 + +; MIPS32R5-DAG: lhu $4, {{[0-9]+}}($sp) +; MIPS32R5-DAG: lhu $5, {{[0-9]+}}($sp) + +; MIPS32R5: jal +; MIPS32R5: sw $2, {{[0-9]+}}($sp) + +; MIPS32R5-DAG: sb ${{[0-9]+}}, 1(${{[0-9]+}}) +; MIPS32R5-DAG; sb ${{[0-9]+}}, %lo(gv2i8)(${{[0-9]+}}) + +; MIPS64EB: daddiu $4, $zero, 1543 +; MIPS64EB: daddiu $5, $zero, 3080 + +; MIPS64EL: daddiu $4, $zero, 1798 +; MIPS64EL; daddiu $5, $zero, 2060 + +; MIPS64R5-DAG: lh $4 +; MIPS64R5-DAG: lh $5 + +; MIPS32: jal i8_2 +; MIPS64: jalr $25 + +; MIPS32EB-DAG: srl $[[R0:[0-9]+]], $2, 16 +; MIPS32EB-DAG: sb $[[R0]] +; MIPS32EB-DAG: srl $[[R1:[0-9]+]], $2, 24 +; MIPS32EB-DAG: sb $[[R1]] + +; MIPS32EL: sb $2 +; MIPS32EL: srl $[[R0:[0-9]+]], $2, 8 +; MIPS32EL: sb $[[R0]] + +; MIPS64EB: dsrl $[[R4:[0-9]+]], $2, 48 +; MIPS64EB: sb $[[R4]] +; MIPS64EB: dsrl $[[R5:[0-9]+]], $2, 56 +; MIPS64EB: sb $[[R5]] + +; MIPS64EL: sll $[[R6:[0-9]+]], $2, 0 +; MIPS64EL: sb $[[R6]] +; MIPS64EL: srl $[[R7:[0-9]+]], $[[R6]], 8 +; MIPS64EL: sb $[[R7]] + +; MIPS64R5: sd $2 + + %0 = call <2 x i8> @i8_2(<2 x i8> , <2 x i8> ) + store <2 x i8> %0, <2 x i8> * @gv2i8 + ret void +} + +define void @call_i8_4() { +entry: +; ALL-LABEL: call_i8_4: +; MIPS32: ori $4 +; MIPS32: ori $5 +; MIPS32-NOT: ori $6 +; MIPS32-NOT: ori $7 + +; MIPS32R5-DAG: lw $4, {{[0-9]+}}($sp) +; MIPS32R5-DAG: lw $5, {{[0-9]+}}($sp) + +; MIPS64: ori $4 +; MIPS64: ori $5 + +; MIPS64R5: lw $4 +; MIPS64R5: lw $5 + +; MIPS32: jal i8_4 +; MIPS64: jalr $25 + +; MIPS32: sw $2 + +; MIPS32R5-DAG: sw $2 + +; MIPS64: sw $2 +; MIPS64R5: sw $2 + + %0 = call <4 x i8> @i8_4(<4 x i8> , <4 x i8> ) + store <4 x i8> %0, <4 x i8> * @gv4i8 + ret void +} + +define void @call_i8_8() { +entry: +; ALL-LABEL: call_i8_8: + +; MIPS32: ori $6 +; MIPS32: ori $4 +; MIPS32: move $5 +; MIPS32: move $7 + +; MIPS32R5-DAG: ori $6 +; MIPS32R5-DAG: ori $4 +; MIPS32R5-DAG: move $5 +; MIPS32R5-DAG: move $7 + +; MIPS64EB: daddiu $4, ${{[0-9]+}}, 2314 +; MIPS64EB: daddiu $5, ${{[0-9]+}}, 2314 + +; MIPS64EL: daddiu $4, ${{[0-9]+}}, 1798 +; MIPS64EL: daddiu $5, ${{[0-9]+}}, 2060 + +; MIPS32: jal i8_8 +; MIPS64: jalr $25 + +; MIPS32-DAG: sw $2 +; MIPS32-DAG: sw $3 + +; MIPS32R5-DAG: sw $2 +; MIPS32R5-DAG: sw $3 + +; MIPS64: sd $2 +; MIPS64R5: sd $2 + + %0 = call <8 x i8> @i8_8(<8 x i8> , <8 x i8> ) + store <8 x i8> %0, <8 x i8> * @gv8i8 + ret void +} + +define void @calli8_16() { +entry: +; ALL-LABEL: calli8_16: +; MIPS32-DAG: sw ${{[0-9]+}}, 28($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 24($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 20($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 16($sp) + +; MIPS32: ori $4, ${{[0-9]+}}, {{[0-9]+}} +; MIPS32: ori $7, ${{[0-9]+}}, {{[0-9]+}} +; MIPS32: move $5, ${{[0-9]+}} +; MIPS32: move $6, ${{[0-9]+}} + +; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}} + +; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp) + +; MIPS64-DAG: daddiu $4 +; MIPS64-DAG: daddiu $5 +; MIPS64-DAG: daddiu $6 +; MIPS64-DAG: daddiu $7 + +; MIPS64R5-DAG: copy_s.d $4 +; MIPS64R5-DAG: copy_s.d $5 +; MIPS64R5-DAG: copy_s.d $6 +; MIPS64R5-DAG: copy_s.d $7 + +; MIPS32: jal i8_16 +; MIPS64: jalr $25 + +; MIPS32-DAG: sw $5, 12(${{[0-9]+}}) +; MIPS32-DAG: sw $4, 8(${{[0-9]+}}) +; MIPS32-DAG: sw $3, 4(${{[0-9]+}}) +; MIPS32-DAG: sw $2, %lo(gv16i8)(${{[0-9]+}}) + +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $3 +; MIPS32R5-DAG: insert.w $w[[W0]][2], $4 +; MIPS32R5-DAG: insert.w $w[[W0]][3], $5 +; MIPS32R5-DAG: st.w $w[[W0]] + +; MIPS64-DAG: sd $3 +; MIPS64-DAG: sd $2 + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2 +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3 + + %0 = call <16 x i8> @i8_16(<16 x i8> , <16 x i8> ) + store <16 x i8> %0, <16 x i8> * @gv16i8 + ret void +} + +define void @calli16_2() { +entry: +; ALL-LABEL: calli16_2: + +; MIPS32-DAG: ori $4 +; MIPS32-DAG: ori $5 + +; MIPS32R5-DAG: lw $4 +; MIPS32R5-DAG: lw $5 + +; MIPS64: ori $4 +; MIPS64: ori $5 + +; MIPS64R5-DAG: lw $4 +; MIPS64R5-DAG: lw $5 + +; MIPS32: jal i16_2 +; MIPS64: jalr $25 + +; MIPS32: sw $2, %lo(gv2i16) + +; MIPS32R5: sw $2, %lo(gv2i16) + +; MIPS64: sw $2 + +; MIPS64R6: sw $2 + + %0 = call <2 x i16> @i16_2(<2 x i16> , <2 x i16> ) + store <2 x i16> %0, <2 x i16> * @gv2i16 + ret void +} + +define void @calli16_4() { +entry: +; ALL-LABEL: calli16_4: +; MIPS32-DAG: ori $4 +; MIPS32-DAG: ori $5 +; MIPS32-DAG: ori $6 +; MIPS32-DAG: move $7 + +; MIPS32R5-DAG: ori $4 +; MIPS32R5-DAG: ori $5 +; MIPS32R5-DAG: ori $6 +; MIPS32R5-DAG: move $7 + +; MIPS64-DAG: daddiu $4 +; MIPS64-DAG: daddiu $5 + +; MIPS64R5-DAG: ld $4 +; MIPS64R5-DAG: ld $5 + +; MIPS32: jal i16_4 +; MIPS64: jalr $25 + +; MIPS32-DAG: sw $3, 4(${{[0-9]+}}) +; MIPS32-DAG: sw $2, %lo(gv4i16)(${{[0-9]+}}) + +; MIPS32R5-DAG: sw $3, 4(${{[0-9]+}}) +; MIPS32R5-DAG: sw $2, %lo(gv4i16)(${{[0-9]+}}) + +; MIPS64: sd $2 +; MIPS64R5: sd $2 + + %0 = call <4 x i16> @i16_4(<4 x i16> , <4 x i16> ) + store <4 x i16> %0, <4 x i16> * @gv4i16 + ret void +} + +define void @calli16_8() { +entry: +; ALL-LABEL: calli16_8: + +; MIPS32-DAG: sw ${{[0-9]+}}, 28($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 24($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 20($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 16($sp) + +; MIPS32-DAG: ori $4, ${{[0-9]+}}, {{[0-9]+}} +; MIPS32-DAG: ori $5, ${{[0-9]+}}, {{[0-9]+}} +; MIPS32-DAG: move $6, ${{[0-9]+}} +; MIPS32-DAG: move $7, ${{[0-9]+}} + +; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}} + +; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp) + +; MIPS64-DAG: daddiu $4 +; MIPS64-DAG: daddiu $7 +; MIPS64-DAG: move $5 +; MIPS64-DAG: move $6 + +; MIPS64R5-DAG: copy_s.d $4, $w[[W0:[0-9]+]][0] +; MIPS64R5-DAG: copy_s.d $5, $w[[W0]][1] +; MIPS64R5-DAG: copy_s.d $6, $w[[W1:[0-9]+]][0] +; MIPS64R5-DAG: copy_s.d $7, $w[[W1]][1] + +; MIPS32: jal i16_8 +; MIPS64: jalr $25 + +; MIPS32-DAG: sw $5, 12(${{[0-9]+}}) +; MIPS32-DAG: sw $4, 8(${{[0-9]+}}) +; MIPS32-DAG: sw $3, 4(${{[0-9]+}}) +; MIPS32-DAG: sw $2, %lo(gv8i16)(${{[0-9]+}}) + +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $3 +; MIPS32R5-DAG: insert.w $w[[W0]][2], $4 +; MIPS32R5-DAG: insert.w $w[[W0]][3], $5 +; MIPS32R5-DAG: st.w $w[[W0]] + +; MIPS64: sd $3 +; MIPS64: sd $2 + +; MIPS64R5-DAG: insert.d $w[[W2:[0-9]+]][0], $2 +; MIPS64R5-DAG: insert.d $w[[W2]][1], $3 + + %0 = call <8 x i16> @i16_8(<8 x i16> , <8 x i16> ) + store <8 x i16> %0, <8 x i16> * @gv8i16 + ret void +} + +define void @calli32_2() { +entry: +; ALL-LABEL: calli32_2: + +; MIPS32-DAG: addiu $4 +; MIPS32-DAG: addiu $5 +; MIPS32-DAG: addiu $6 +; MIPS32-DAG: addiu $7 + +; MIPS32R5-DAG: addiu $4 +; MIPS32R5-DAG: addiu $5 +; MIPS32R5-DAG: addiu $6 +; MIPS32R5-DAG: addiu $7 + +; MIPS64: daddiu $4 +; MIPS64: daddiu $5 + +; MIPS64R5-DAG: ld $4 +; MIPS64R5-DAG: ld $5 + +; MIPS32: jal i32_2 +; MIPS64: jalr $25 + +; MIPS32-DAG: sw $2, %lo(gv2i32)(${{[0-9]+}}) +; MIPS32-DAG: sw $3, 4(${{[0-9]+}}) + +; MIPS32R5-DAG: sw $2, %lo(gv2i32)(${{[0-9]+}}) +; MIPS32R5-DAG: sw $3, 4(${{[0-9]+}}) + +; MIPS64: sd $2 + +; MIPS64R5: sd $2 + + %0 = call <2 x i32> @i32_2(<2 x i32> , <2 x i32> ) + store <2 x i32> %0, <2 x i32> * @gv2i32 + ret void +} + +define void @calli32_4() { +entry: +; ALL-LABEL: calli32_4: + +; MIPS32-DAG: sw ${{[0-9]+}}, 28($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 24($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 20($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 16($sp) + +; MIPS32-DAG: addiu $4 +; MIPS32-DAG: addiu $5 +; MIPS32-DAG: addiu $6 +; MIPS32-DAG: addiu $7 + +; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp) + +; MIPS32R5-DAG: addiu $4 +; MIPS32R5-DAG: addiu $5 +; MIPS32R5-DAG: addiu $6 +; MIPS32R5-DAG: addiu $7 + +; MIPS64-DAG: daddiu $4 +; MIPS64-DAG: daddiu $6 +; MIPS64-DAG: daddiu $5 +; MIPS64-DAG: move $7 + +; MIPS64R5-DAG: copy_s.d $4, $w[[W0:[0-9]+]][0] +; MIPS64R5-DAG: copy_s.d $5, $w[[W0]][1] +; MIPS64R5-DAG: copy_s.d $6, $w[[W1:[0-9]+]][0] +; MIPS64R5-DAG: copy_s.d $7, $w[[W1]][1] + +; MIPS32: jal i32_4 +; MIPS64: jalr $25 + +; MIPS32-DAG: sw $5, 12(${{[0-9]+}}) +; MIPS32-DAG: sw $4, 8(${{[0-9]+}}) +; MIPS32-DAG: sw $3, 4(${{[0-9]+}}) +; MIPS32-DAG: sw $2, %lo(gv4i32)(${{[0-9]+}}) + +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $3 +; MIPS32R5-DAG: insert.w $w[[W0]][2], $4 +; MIPS32R5-DAG: insert.w $w[[W0]][3], $5 +; MIPS32R5-DAG: st.w $w[[W0]] + +; MIPS64-DAG: sd $2 +; MIPS64-DAG: sd $3 + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2 +; MIPS64R6-DAG: insert.d $w[[W0:[0-9]+]][1], $3 + + %0 = call <4 x i32> @i32_4(<4 x i32> , <4 x i32> ) + store <4 x i32> %0, <4 x i32> * @gv4i32 + ret void +} + +define void @calli64_2() { +entry: +; ALL-LABEL: calli64_2: + +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 28($sp) +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 24($sp) +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 20($sp) +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 16($sp) + +; MIPS32-DAG: addiu $4 +; MIPS32-DAG: addiu $5 +; MIPS32-DAG: addiu $6 +; MIPS32-DAG: addiu $7 + +; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}} + +; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp) + +; MIPS64: daddiu $4 +; MIPS64: daddiu $5 +; MIPS64: daddiu $6 +; MIPS64: daddiu $7 + +; MIPS64R5: daddiu $4 +; MIPS64R5: daddiu $5 +; MIPS64R5: daddiu $6 +; MIPS64R5: daddiu $7 + +; MIPS32: jal i64_2 +; MIPS64: jalr $25 + +; MIPS32-DAG: sw $5, 12(${{[0-9]+}}) +; MIPS32-DAG: sw $4, 8(${{[0-9]+}}) +; MIPS32-DAG: sw $3, 4(${{[0-9]+}}) +; MIPS32-DAG: sw $2, %lo(gv2i64)(${{[0-9]+}}) + +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $3 +; MIPS32R5-DAG: insert.w $w[[W0]][2], $4 +; MIPS32R5-DAG: insert.w $w[[W0]][3], $5 +; MIPS32R5-DAG: st.w $w[[W0]] + +; MIPS64-DAG: sd $3 +; MIPS64-DAG: sd $2 + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2 +; MIPS64R6-DAG: insert.d $w[[W0:[0-9]+]][1], $3 + + %0 = call <2 x i64> @i64_2(<2 x i64> , <2 x i64> ) + store <2 x i64> %0, <2 x i64> * @gv2i64 + ret void +} + +declare <2 x float> @float2_extern(<2 x float>, <2 x float>) +declare <4 x float> @float4_extern(<4 x float>, <4 x float>) +declare <2 x double> @double2_extern(<2 x double>, <2 x double>) + +define void @callfloat_2() { +entry: +; ALL-LABEL: callfloat_2: + +; MIPS32-DAG: addiu $4, $sp, 24 +; MIPS32-DAG: addiu $6, $zero, 0 +; MIPS32-DAG: lui $7 + +; MIPS32R5-DAG: addiu $4, $sp, 24 +; MIPS32R5-DAG: addiu $6, $zero, 0 +; MIPS32R5-DAG: lui $7 + +; MIPS64: dsll $4 +; MIPS64: dsll $5 + +; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}} +; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}} + +; MIPS32: jal float2_extern +; MIPS64: jalr $25 + +; MIPS32-DAG: lwc1 $f[[F0:[0-9]+]], 24($sp) +; MIPS32-DAG: lwc1 $f[[F1:[0-9]+]], 28($sp) + +; MIPS32-DAG: swc1 $f[[F1]], 4(${{[0-9]+}}) +; MIPS32-DAG: swc1 $f[[F0]], %lo(gv2f32)(${{[0-9]+}}) + +; MIPS32R5-DAG: lwc1 $f[[F0:[0-9]+]], 24($sp) +; MIPS32R5-DAG: lwc1 $f[[F1:[0-9]+]], 28($sp) + +; MIPS32R5-DAG: swc1 $f[[F1]], 4(${{[0-9]+}}) +; MIPS32R5-DAG: swc1 $f[[F0]], %lo(gv2f32)(${{[0-9]+}}) + +; MIPS64: sd $2 + +; MIPS64R5: sd $2 + + %0 = call <2 x float> @float2_extern(<2 x float> , <2 x float> ) + store <2 x float> %0, <2 x float> * @gv2f32 + ret void +} + +define void @callfloat_4() { +entry: +; ALL-LABEL: callfloat_4: + +; MIPS32: sw ${{[0-9]+}}, 36($sp) +; MIPS32: sw ${{[0-9]+}}, 32($sp) +; MIPS32: sw ${{[0-9]+}}, 28($sp) +; MIPS32: sw ${{[0-9]+}}, 24($sp) +; MIPS32: sw ${{[0-9]+}}, 20($sp) +; MIPS32: sw ${{[0-9]+}}, 16($sp) +; MIPS32: addiu $4, $sp, 48 +; MIPS32: addiu $6, $zero, 0 +; MIPS32: lui $7 + +; MIPS32R5: copy_s.w $6, $w{{[0-9]+}} +; MIPS32R5: copy_s.w $7, $w{{[0-9]+}} +; MIPS32R5: sw ${{[0-9]+}}, 36($sp) +; MIPS32R5: sw ${{[0-9]+}}, 32($sp) +; MIPS32R5: sw ${{[0-9]+}}, 28($sp) +; MIPS32R5: sw ${{[0-9]+}}, 24($sp) +; MIPS32R5: sw ${{[0-9]+}}, 20($sp) +; MIPS32R5: sw ${{[0-9]+}}, 16($sp) +; MIPS32R5: addiu $4, $sp, 48 + +; MIPS64-DAG: dsll $4 +; MIPS64-DAG: dsll $5 +; MIPS64-DAG: dsll $6 +; MIPS64-DAG: dsll $7 + +; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}} +; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}} +; MIPS64R5-DAG: copy_s.d $6, $w{{[0-9]+}} +; MIPS64R5-DAG: copy_s.d $7, $w{{[0-9]+}} + +; MIPS64: jalr $25 +; MIPS32: jal + +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 48($sp) +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 52($sp) +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 56($sp) +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 60($sp) + +; MIPS32R5: ld.w $w{{[0-9]+}}, 48($sp) + +; MIPS64-DAG: $2 +; MIPS64-DAG: $3 + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2 +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3 + + %0 = call <4 x float> @float4_extern(<4 x float> , <4 x float> ) + store <4 x float> %0, <4 x float> * @gv4f32 + ret void +} + +define void @calldouble_2() { +entry: +; ALL-LABEL: calldouble_2: + +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 36($sp) +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 32($sp) +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 28($sp) +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 24($sp) +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 20($sp) +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 16($sp) + +; MIPS32-DAG: addiu $4, $sp, [[R0:[0-9]+]] +; MIPS32-DAG: addiu $6, $zero, 0 +; MIPS32-DAG: addiu $7, $zero, 0 + +; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}} + +; MIPS32R5-DAG: sw ${{[0-9]+}}, 36($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 32($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp) + +; MIPS64-DAG: dsll $5 +; MIPS64-DAG: dsll $6 +; MIPS64-DAG: dsll $7 +; MIPS64-DAG: daddiu $4 + +; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}} +; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}} +; MIPS64R5-DAG: copy_s.d $6, $w{{[0-9]+}} +; MIPS64R5-DAG: copy_s.d $7, $w{{[0-9]+}} + +; MIPS32: jal double2_extern +; MIPS64: jalr $25 + +; MIPS32-DAG: ldc1 $f[[F0:[0-9]+]], 48($sp) +; MIPS32-DAG: ldc1 $f[[F1:[0-9]+]], 56($sp) + +; MIPS32-DAG: sdc1 $f[[F1]], 8(${{[0-9]+}}) +; MIPS32-DAG: sdc1 $f[[F0]], %lo(gv2f64)(${{[0-9]+}}) + +; MIPS32R5: ld.d $w[[W0:[0-9]+]], 48($sp) +; MIPS32R5: st.d $w[[W0]], 0(${{[0-9]+}}) + +; MIPS64-DAG: sd $2 +; MIPS64-DAG: sd $3 + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2 +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3 + + %0 = call <2 x double> @double2_extern(<2 x double> , <2 x double> ) + store <2 x double> %0, <2 x double> * @gv2f64 + ret void +} + +; The mixed tests show that due to alignment requirements, $5 is not used +; in argument passing. + +define float @mixed_i8(<2 x float> %a, i8 %b, <2 x float> %c) { +entry: +; ALL-LABEL: mixed_i8: + +; MIPS32-DAG: mtc1 $5, $f{{[0-9]+}} +; MIPS32: andi $[[R7:[0-9]+]], $6, 255 +; MIPS32: mtc1 $[[R7]], $f[[F0:[0-9]+]] +; MIPS32: cvt.s.w $f{{[0-9]+}}, $f[[F0]] + +; MIPS32-DAG: mtc1 $4, $f{{[0-9]+}} +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 16($sp) +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 20($sp) +; MIPS32-DAG: add.s $f0, $f{{[0-9]+}}, $f{{[0-9]+}} + +; MIPS32R5: andi $[[R0:[0-9]+]], $6, 255 +; MIPS32R5: sw $[[R0]], {{[0-9]+}}($sp) +; MIPS32R5: sw $[[R0]], {{[0-9]+}}($sp) +; MIPS32R5-DAG: sw $5, {{[0-9]+}}($sp) +; MIPS32R5-DAG: sw $4, {{[0-9]+}}($sp) + +; MIPS64EB-DAG: sll $[[R0:[0-9]+]], $4, 0 +; MIPS64EB-DAG: mtc1 $[[R0]], $f{{[0-9]+}} +; MIPS64EB: sll $[[R6:[0-9]+]], $5, 0 +; MIPS64EB: andi $[[R7:[0-9]+]], $[[R6]], 255 +; MIPS64EB: mtc1 $[[R7]], $f[[F0:[0-9]+]] +; MIPS64EB: cvt.s.w $f{{[0-9]+}}, $f[[F0]] + +; MIPS64EB-DAG: dsrl $[[R1:[0-9]+]], $4, 32 +; MIPS64EB-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0 +; MIPS64EB-DAG: mtc1 $[[R2:[0-9]+]], $f{{[0-9]+}} + +; MIPS64EB-DAG: sll $[[R3:[0-9]+]], $6, 0 +; MIPS64EB-DAG: mtc1 $[[R3]], $f{{[0-9]+}} +; MIPS64EB-DAG: dsrl $[[R4:[0-9]+]], $6, 32 +; MIPS64EB-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0 +; MIPS64EB-DAG: mtc1 $[[R5:[0-9]+]], $f{{[0-9]+}} + +; MIPS64EL-DAG: dsrl $[[R1:[0-9]+]], $4, 32 +; MIPS64EL-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0 +; MIPS64EL-DAG: mtc1 $[[R2:[0-9]+]], $f{{[0-9]+}} + +; MIPS64EL: sll $[[R6:[0-9]+]], $5, 0 +; MIPS64EL: andi $[[R7:[0-9]+]], $[[R6]], 255 +; MIPS64EL: mtc1 $[[R7]], $f[[F0:[0-9]+]] +; MIPS64EL: cvt.s.w $f{{[0-9]+}}, $f[[F0]] + +; MIPS64EL-DAG: dsrl $[[R4:[0-9]+]], $6, 32 +; MIPS64EL-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0 +; MIPS64EL-DAG: mtc1 $[[R5:[0-9]+]], $f{{[0-9]+}} + +; MIPS64EL-DAG: sll $[[R0:[0-9]+]], $4, 0 +; MIPS64EL-DAG: mtc1 $[[R0]], $f{{[0-9]+}} +; MIPS64EL-DAG: sll $[[R3:[0-9]+]], $6, 0 +; MIPS64EL-DAG: mtc1 $[[R3]], $f{{[0-9]+}} + +; MIPS64R5: sll $[[R0:[0-9]+]], $5, 0 +; MIPS64R5: andi $[[R1:[0-9]+]], $[[R0]], 255 +; MIPS64R5: sd $4, {{[0-9]+}}($sp) +; MIPS64R5: sd $6, {{[0-9]+}}($sp) + + %0 = zext i8 %b to i32 + %1 = uitofp i32 %0 to float + %2 = insertelement <2 x float> undef, float %1, i32 0 + %3 = insertelement <2 x float> %2, float %1, i32 1 + %4 = fadd <2 x float> %3, %a + %5 = fadd <2 x float> %4, %c + %6 = extractelement <2 x float> %5, i32 0 + %7 = extractelement <2 x float> %5, i32 1 + %8 = fadd float %6, %7 + ret float %8 +} + +define <4 x float> @mixed_32(<4 x float> %a, i32 %b) { +entry: +; ALL-LABEL: mixed_32: + +; MIPS32-DAG: mtc1 $6, $f{{[0-9]+}} +; MIPS32-DAG: mtc1 $7, $f{{[0-9]+}} +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 28($sp) +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 24($sp) +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4) +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4) +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4) +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 12($4) + +; MIPS32R5: insert.w $w[[W0:[0-9]+]][0], $6 +; MIPS32R5: insert.w $w[[W0:[0-9]+]][1], $7 +; MIPS32R5: lw $[[R0:[0-9]+]], 16($sp) +; MIPS32R5: insert.w $w[[W0:[0-9]+]][2], $[[R0]] +; MIPS32R5: lw $[[R1:[0-9]+]], 20($sp) +; MIPS32R5: insert.w $w[[W0:[0-9]+]][3], $[[R1]] +; MIPS32R5: lw $[[R0:[0-9]+]], 24($sp) + +; MIPS64-DAG: sll ${{[0-9]+}}, $6, 0 +; MIPS64-DAG: dsrl $[[R0:[0-9]+]], $4, 32 +; MIPS64-DAG: sll $[[R1:[0-9]+]], $[[R0]], 0 +; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}} +; MIPS64-DAG: sll $[[R2:[0-9]+]], $4, 0 +; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32 +; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R3]], 0 +; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}} +; MIPS64-DAG: mtc1 $[[R2]], $f{{[0-9]+}} +; MIPS64-DAG: sll $[[R6:[0-9]+]], $5, 0 +; MIPS64-DAG: mtc1 $[[R6:[0-9]+]], $f{{[0-9]+}} + +; MIPS64R5: insert.d $w[[W0:[0-9]+]][0], $4 +; MIPS64R5: insert.d $w[[W0]][1], $5 +; MIPS64R5: sll $[[R0:[0-9]+]], $6, 0 +; MIPS64R5: fill.w $w{{[0-9]+}}, $[[R0]] + + %0 = uitofp i32 %b to float + %1 = insertelement <4 x float> undef, float %0, i32 0 + %2 = insertelement <4 x float> %1, float %0, i32 1 + %3 = insertelement <4 x float> %2, float %0, i32 2 + %4 = insertelement <4 x float> %3, float %0, i32 3 + %5 = fadd <4 x float> %4, %a + ret <4 x float> %5 +} + + +; This test is slightly more fragile than I'd like as the offset into the +; outgoing arguments area is dependant on the size of the stack frame for +; this function. + +define <4 x float> @cast(<4 x i32> %a) { +entry: +; ALL-LABEL: cast: + +; MIPS32: addiu $sp, $sp, -32 +; MIPS32-DAG: sw $6, {{[0-9]+}}($sp) +; MIPS32-DAG: sw $7, {{[0-9]+}}($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 48($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 52($sp) + +; MIPS32R5-DAG: insert.w $w0[0], $6 +; MIPS32R5-DAG: insert.w $w0[1], $7 +; MIPS32R5-DAG: lw $[[R0:[0-9]+]], 16($sp) +; MIPS32R5-DAG: insert.w $w0[2], $[[R0]] +; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 20($sp) +; MIPS32R5-DAG: insert.w $w0[3], $[[R1]] + +; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32 +; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32 + +; MIPS64R5-DAG: insert.d $w0[0], $4 +; MIPS64R5-DAG: insert.d $w0[1], $5 + + %0 = uitofp <4 x i32> %a to <4 x float> + ret <4 x float> %0 +} + +define <4 x float> @select(<4 x i32> %cond, <4 x float> %arg1, <4 x float> %arg2) { +entry: +; ALL-LABEL: select: + +; MIPS32-DAG: andi ${{[0-9]+}}, $7, 1 +; MIPS32-DAG: andi ${{[0-9]+}}, $6, 1 +; MIPS32-DAG: lw $[[R0:[0-9]+]], 16($sp) +; MIPS32-DAG: andi ${{[0-9]+}}, $[[R0]], 1 +; MIPS32-DAG: lw $[[R1:[0-9]+]], 20($sp) +; MIPS32-DAG: andi ${{[0-9]+}}, $[[R0]], 1 + +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $6 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $7 +; MIPS32R5-DAG: lw $[[R0:[0-9]+]], 16($sp) +; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 20($sp) +; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R0]] +; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R1]] +; MIPS32R5-DAG: slli.w $w{{[0-9]}}, $w[[W0]] + +; MIPS64-DAG: sll $[[R0:[0-9]+]], $6, 0 +; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}} +; MIPS64-DAG: dsrl $[[R1:[0-9]+]], $6, 32 +; MIPS64-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0 +; MIPS64-DAG: mtc1 $[[R2]], $f{{[0-9]+}} + +; MIPS64-DAG: sll $[[R3:[0-9]+]], $7, 0 +; MIPS64-DAG: mtc1 $[[R3]], $f{{[0-9]+}} +; MIPS64-DAG: dsrl $[[R4:[0-9]+]], $7, 32 +; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0 +; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}} + +; MIPS64-DAG: sll $[[R6:[0-9]+]], $8, 0 +; MIPS64-DAG: mtc1 $[[R6]], $f{{[0-9]+}} +; MIPS64-DAG: dsrl $[[R7:[0-9]+]], $8, 32 +; MIPS64-DAG: sll $[[R8:[0-9]+]], $[[R7]], 0 +; MIPS64-DAG: mtc1 $[[R8]], $f{{[0-9]+}} + +; MIPS64-DAG: sll $[[R9:[0-9]+]], $9, 0 +; MIPS64-DAG: mtc1 $[[R9]], $f{{[0-9]+}} +; MIPS64-DAG: dsrl $[[R10:[0-9]+]], $9, 32 +; MIPS64-DAG: sll $[[R11:[0-9]+]], $[[R10]], 0 +; MIPS64-DAG: mtc1 $[[R11]], $f{{[0-9]+}} + +; MIPS64-DAG: sll $[[R12:[0-9]+]], $4, 0 +; MIPS64-DAG: andi ${{[0-9]+}}, $[[R12]], 1 +; MIPS64-DAG: dsrl $[[R13:[0-9]+]], $4, 32 +; MIPS64-DAG: sll $[[R14:[0-9]+]], $[[R13]], 0 +; MIPS64-DAG: andi ${{[0-9]+}}, $[[R14]], 1 + +; MIPS64-DAG: sll $[[R15:[0-9]+]], $5, 0 +; MIPS64-DAG: andi ${{[0-9]+}}, $[[R15]], 1 +; MIPS64-DAG: dsrl $[[R16:[0-9]+]], $5, 32 +; MIPS64-DAG: sll $[[R17:[0-9]+]], $[[R16]], 0 +; MIPS64-DAG: andi ${{[0-9]+}}, $[[R17]], 1 + +; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $8 +; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $9 +; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $6 +; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $7 +; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $4 +; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $5 + + %cond.t = trunc <4 x i32> %cond to <4 x i1> + %res = select <4 x i1> %cond.t, <4 x float> %arg1, <4 x float> %arg2 + ret <4 x float> %res +} Index: llvm/trunk/test/CodeGen/Mips/ctlz-v.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/ctlz-v.ll +++ llvm/trunk/test/CodeGen/Mips/ctlz-v.ll @@ -8,10 +8,14 @@ ; MIPS32: clz $2, $4 ; MIPS32: clz $3, $5 -; MIPS64-DAG: sll $[[A0:[0-9]+]], $4, 0 -; MIPS64-DAG: clz $2, $[[A0]] -; MIPS64-DAG: sll $[[A1:[0-9]+]], $5, 0 -; MIPS64-DAG: clz $3, $[[A1]] +; MIPS64-DAG: dsrl $[[A0:[0-9]+]], $4, 32 +; MIPS64-DAG: sll $[[A1:[0-9]+]], $[[A0]], 0 +; MIPS64-DAG: clz $[[R0:[0-9]+]], $[[A1]] +; MIPS64-DAG: dsll $[[R1:[0-9]+]], $[[R0]], 32 +; MIPS64-DAG: sll $[[A2:[0-9]+]], $4, 0 +; MIPS64-DAG: clz $[[R2:[0-9]+]], $[[A2]] +; MIPS64-DAG: dext $[[R3:[0-9]+]], $[[R2]], 0, 32 +; MIPS64-DAG: or $2, $[[R3]], $[[R1]] %ret = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 true) ret <2 x i32> %ret Index: llvm/trunk/test/CodeGen/Mips/cttz-v.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/cttz-v.ll +++ llvm/trunk/test/CodeGen/Mips/cttz-v.ll @@ -24,14 +24,17 @@ ; MIPS64-DAG: and $[[R2:[0-9]+]], $[[R1]], $[[R0]] ; MIPS64-DAG: clz $[[R3:[0-9]+]], $[[R2]] ; MIPS64-DAG: addiu $[[R4:[0-9]+]], $zero, 32 -; MIPS64-DAG: subu $2, $[[R4]], $[[R3]] -; MIPS64-DAG: sll $[[A1:[0-9]+]], $5, 0 -; MIPS64-DAG: addiu $[[R5:[0-9]+]], $[[A1]], -1 -; MIPS64-DAG: not $[[R6:[0-9]+]], $[[A1]] -; MIPS64-DAG: and $[[R7:[0-9]+]], $[[R6]], $[[R5]] -; MIPS64-DAG: clz $[[R8:[0-9]+]], $[[R7]] -; MIPS64-DAG: jr $ra -; MIPS64-DAG: subu $3, $[[R4]], $[[R8]] +; MIPS64-DAG: subu $[[R5:[0-9]+]], $[[R4]], $[[R3]] +; MIPS64-DAG: dsrl $[[R6:[0-9]+]], $4, 32 +; MIPS64-DAG: sll $[[R7:[0-9]+]], $[[R6]], 0 +; MIPS64-DAG: dext $[[R8:[0-9]+]], $[[R5]], 0, 32 +; MIPS64-DAG: addiu $[[R9:[0-9]+]], $[[R7]], -1 +; MIPS64-DAG: not $[[R10:[0-9]+]], $[[R7]] +; MIPS64-DAG: and $[[R11:[0-9]+]], $[[R10]], $[[R9]] +; MIPS64-DAG: clz $[[R12:[0-9]+]], $[[R11]] +; MIPS64-DAG: subu $[[R13:[0-9]+]], $[[R4]], $[[R12]] +; MIPS64-DAG: dsll $[[R14:[0-9]+]], $[[R13]], 32 +; MIPS64-DAG: or $2, $[[R8]], $[[R14]] %ret = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 true) ret <2 x i32> %ret Index: llvm/trunk/test/CodeGen/Mips/return-vector.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/return-vector.ll +++ llvm/trunk/test/CodeGen/Mips/return-vector.ll @@ -128,8 +128,11 @@ ; CHECK-LABEL: call_f2: ; CHECK: call16(f2) -; CHECK-NOT: lwc1 -; CHECK: add.s $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]] +; CHECK: addiu $4, $sp, [[O0:[0-9]+]] +; CHECK-DAG: lwc1 $f[[F0:[0-9]]], [[O0]]($sp) +; CHECK-DAG: lwc1 $f[[F1:[0-9]]], 20($sp) +; CHECK: add.s $f0, $f[[F0]], $f[[F1]] + } @@ -143,11 +146,12 @@ ; CHECK-LABEL: call_d2: ; CHECK: call16(d2) -; CHECK-NOT: ldc1 -; CHECK: add.d $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]] -} - +; CHECK: addiu $4, $sp, [[O0:[0-9]+]] +; CHECK-DAG: ldc1 $f[[F0:[0-9]+]], 24($sp) +; CHECK-DAG: ldc1 $f[[F1:[0-9]+]], [[O0]]($sp) +; CHECK: add.d $f0, $f[[F1]], $f[[F0]] +} ; Check that function returns vector on stack in cases when vector can't be ; returned in registers. Also check that vector is placed on stack starting @@ -179,11 +183,12 @@ ret <4 x float> %vecins4 ; CHECK-LABEL: return_f4: -; CHECK-DAG: lwc1 $[[R0:[a-z0-9]+]], 16($sp) -; CHECK-DAG: swc1 $[[R0]], 12($4) +; CHECK-DAG: lwc1 $f[[R0:[0-9]+]], 16($sp) +; CHECK-DAG: swc1 $f[[R0]], 12($4) ; CHECK-DAG: sw $7, 8($4) ; CHECK-DAG: sw $6, 4($4) ; CHECK-DAG: sw $5, 0($4) + } @@ -227,8 +232,8 @@ ret <2 x float> %vecins2 ; CHECK-LABEL: return_f2: -; CHECK: mov.s $f0, $f12 -; CHECK: mov.s $f2, $f14 +; CHECK-DAG: sw $5, 0($4) +; CHECK-DAG: sw $6, 4($4) } @@ -239,6 +244,10 @@ ret <2 x double> %vecins2 ; CHECK-LABEL: return_d2: -; CHECK: mov.d $f0, $f12 -; CHECK: mov.d $f2, $f14 +; CHECK-DAG: ldc1 $f[[F0:[0-9]]], 16($sp) +; CHECK-DAG: sdc1 $f[[F0]], 8($4) +; CHECK-DAG: mtc1 $6, $f[[F1:[0-9]+]] +; CHECK-DAG: mtc1 $7, $f +; CHECK-DAG: sdc1 $f[[F0]], 0($4) + }