Index: llvm/lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- llvm/lib/Target/Mips/MipsISelLowering.cpp +++ llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -102,29 +102,37 @@ if (!VT.isVector()) return getRegisterType(Context, VT); - return Subtarget.isABI_O32() || VT.getSizeInBits() == 32 ? MVT::i32 - : MVT::i64; + if (VT.isPow2VectorType() && VT.getVectorElementType().isRound()) + return Subtarget.isABI_O32() || VT.getSizeInBits() == 32 ? MVT::i32 + : MVT::i64; + return getRegisterType(Context, VT.getVectorElementType()); } unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - if (VT.isVector()) - return divideCeil(VT.getSizeInBits(), Subtarget.isABI_O32() ? 32 : 64); + if (VT.isVector()) { + if (VT.isPow2VectorType() && VT.getVectorElementType().isRound()) + return divideCeil(VT.getSizeInBits(), Subtarget.isABI_O32() ? 32 : 64); + return VT.getVectorNumElements() * + getNumRegisters(Context, VT.getVectorElementType()); + } return MipsTargetLowering::getNumRegisters(Context, VT); } unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv( LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const { - // Break down vector types to either 2 i64s or 4 i32s. - RegisterVT = getRegisterTypeForCallingConv(Context, CC, VT); - IntermediateVT = RegisterVT; - NumIntermediates = - VT.getFixedSizeInBits() < RegisterVT.getFixedSizeInBits() - ? VT.getVectorNumElements() - : divideCeil(VT.getSizeInBits(), RegisterVT.getSizeInBits()); - return NumIntermediates; + if (VT.isPow2VectorType()) { + IntermediateVT = getRegisterTypeForCallingConv(Context, CC, VT); + RegisterVT = IntermediateVT.getSimpleVT(); + NumIntermediates = getNumRegistersForCallingConv(Context, CC, VT); + return NumIntermediates; + } + IntermediateVT = VT.getVectorElementType(); + NumIntermediates = VT.getVectorNumElements(); + RegisterVT = getRegisterType(Context, IntermediateVT); + return NumIntermediates * getNumRegisters(Context, IntermediateVT); } SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const { Index: llvm/test/CodeGen/Mips/cconv/illegal-vectors.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/cconv/illegal-vectors.ll @@ -0,0 +1,1809 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=mips64 < %s | FileCheck %s --check-prefix=MIPS64 +; RUN: llc -mtriple=mips < %s | FileCheck %s --check-prefix=MIPS32 + +define void @arg_v1i32(<1 x i32> %vec, ptr %p) { +; MIPS64-LABEL: arg_v1i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sw $4, 0($5) +; +; MIPS32-LABEL: arg_v1i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sw $4, 0($5) + store <1 x i32> %vec, ptr %p + ret void +} + +define <1 x i32> @ret_v1i32(ptr %p) { +; MIPS64-LABEL: ret_v1i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: lw $2, 0($4) +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS32-LABEL: ret_v1i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop + %v = load <1 x i32>, ptr %p + ret <1 x i32> %v +} + +define void @call_v1i32(ptr %p) nounwind { +; MIPS64-LABEL: call_v1i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: daddiu $sp, $sp, -16 +; MIPS64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; MIPS64-NEXT: move $16, $4 +; MIPS64-NEXT: lw $4, 0($4) +; MIPS64-NEXT: jal arg_v1i32 +; MIPS64-NEXT: nop +; MIPS64-NEXT: jal ret_v1i32 +; MIPS64-NEXT: nop +; MIPS64-NEXT: sw $2, 0($16) +; MIPS64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 16 +; +; MIPS32-LABEL: call_v1i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: addiu $sp, $sp, -24 +; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $16, $4 +; MIPS32-NEXT: lw $4, 0($4) +; MIPS32-NEXT: jal arg_v1i32 +; MIPS32-NEXT: nop +; MIPS32-NEXT: jal ret_v1i32 +; MIPS32-NEXT: nop +; MIPS32-NEXT: sw $2, 0($16) +; MIPS32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 24 + %v1 = load <1 x i32>, ptr %p + call void @arg_v1i32(<1 x i32> %v1) + %v2 = call <1 x i32> @ret_v1i32() + store <1 x i32> %v2, ptr %p + ret void +} + +define void @arg_v2i32(<2 x i32> %vec, ptr %p) { +; MIPS64-LABEL: arg_v2i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sd $4, 0($5) +; +; MIPS32-LABEL: arg_v2i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: sw $5, 4($6) +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sw $4, 0($6) + store <2 x i32> %vec, ptr %p + ret void +} + +define <2 x i32> @ret_v2i32(ptr %p) { +; MIPS64-LABEL: ret_v2i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: ld $2, 0($4) +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS32-LABEL: ret_v2i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lw $3, 4($4) +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop + %v = load <2 x i32>, ptr %p + ret <2 x i32> %v +} + +define void @call_v2i32(ptr %p) nounwind { +; MIPS64-LABEL: call_v2i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: daddiu $sp, $sp, -16 +; MIPS64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; MIPS64-NEXT: move $16, $4 +; MIPS64-NEXT: ld $4, 0($4) +; MIPS64-NEXT: jal arg_v2i32 +; MIPS64-NEXT: nop +; MIPS64-NEXT: jal ret_v2i32 +; MIPS64-NEXT: nop +; MIPS64-NEXT: sd $2, 0($16) +; MIPS64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 16 +; +; MIPS32-LABEL: call_v2i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: addiu $sp, $sp, -24 +; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $16, $4 +; MIPS32-NEXT: lw $5, 4($4) +; MIPS32-NEXT: lw $4, 0($4) +; MIPS32-NEXT: jal arg_v2i32 +; MIPS32-NEXT: nop +; MIPS32-NEXT: jal ret_v2i32 +; MIPS32-NEXT: nop +; MIPS32-NEXT: sw $3, 4($16) +; MIPS32-NEXT: sw $2, 0($16) +; MIPS32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 24 + %v1 = load <2 x i32>, ptr %p + call void @arg_v2i32(<2 x i32> %v1) + %v2 = call <2 x i32> @ret_v2i32() + store <2 x i32> %v2, ptr %p + ret void +} + +define <3 x i32> @arg_v3i32(<3 x i32> %vec, ptr %p) { +; MIPS64-LABEL: arg_v3i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: daddiu $1, $zero, 1 +; MIPS64-NEXT: dsll $1, $1, 32 +; MIPS64-NEXT: daddiu $2, $1, -1 +; MIPS64-NEXT: sll $1, $6, 0 +; MIPS64-NEXT: sw $1, 8($7) +; MIPS64-NEXT: and $2, $5, $2 +; MIPS64-NEXT: dsll $3, $4, 32 +; MIPS64-NEXT: or $2, $2, $3 +; MIPS64-NEXT: sd $2, 0($7) +; MIPS64-NEXT: sll $2, $4, 0 +; MIPS64-NEXT: sll $3, $5, 0 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: move $4, $1 +; +; MIPS32-LABEL: arg_v3i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: sw $6, 8($7) +; MIPS32-NEXT: sw $5, 4($7) +; MIPS32-NEXT: sw $4, 0($7) +; MIPS32-NEXT: move $2, $4 +; MIPS32-NEXT: move $3, $5 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: move $4, $6 + store <3 x i32> %vec, ptr %p + ret <3 x i32> %vec +} + +define <3 x i32> @ret_v3i32(ptr %p) { +; MIPS64-LABEL: ret_v3i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: lw $1, 8($4) +; MIPS64-NEXT: ld $2, 0($4) +; MIPS64-NEXT: sll $3, $2, 0 +; MIPS64-NEXT: dsrl $2, $2, 32 +; MIPS64-NEXT: sll $2, $2, 0 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: move $4, $1 +; +; MIPS32-LABEL: ret_v3i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lw $3, 4($4) +; MIPS32-NEXT: lw $4, 8($4) +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop + %v = load <3 x i32>, ptr %p + ret <3 x i32> %v +} + +define void @call_v3i32(ptr %p) nounwind { +; MIPS64-LABEL: call_v3i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: daddiu $sp, $sp, -16 +; MIPS64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; MIPS64-NEXT: move $16, $4 +; MIPS64-NEXT: lw $6, 8($4) +; MIPS64-NEXT: ld $5, 0($4) +; MIPS64-NEXT: jal arg_v3i32 +; MIPS64-NEXT: dsrl $4, $5, 32 +; MIPS64-NEXT: jal ret_v3i32 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # kill: def $v0 killed $v0 def $v0_64 +; MIPS64-NEXT: sw $4, 8($16) +; MIPS64-NEXT: dsll $1, $2, 32 +; MIPS64-NEXT: dsll $2, $3, 32 +; MIPS64-NEXT: dsrl $2, $2, 32 +; MIPS64-NEXT: or $1, $2, $1 +; MIPS64-NEXT: sd $1, 0($16) +; MIPS64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 16 +; +; MIPS32-LABEL: call_v3i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: addiu $sp, $sp, -24 +; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $16, $4 +; MIPS32-NEXT: lw $6, 8($4) +; MIPS32-NEXT: lw $5, 4($4) +; MIPS32-NEXT: lw $4, 0($4) +; MIPS32-NEXT: jal arg_v3i32 +; MIPS32-NEXT: nop +; MIPS32-NEXT: jal ret_v3i32 +; MIPS32-NEXT: nop +; MIPS32-NEXT: sw $4, 8($16) +; MIPS32-NEXT: sw $3, 4($16) +; MIPS32-NEXT: sw $2, 0($16) +; MIPS32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 24 + %v1 = load <3 x i32>, ptr %p + call void @arg_v3i32(<3 x i32> %v1) + %v2 = call <3 x i32> @ret_v3i32() + store <3 x i32> %v2, ptr %p + ret void +} + +define void @arg_v4i32(<4 x i32> %vec, ptr %p) { +; MIPS64-LABEL: arg_v4i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: sd $5, 8($6) +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sd $4, 0($6) +; +; MIPS32-LABEL: arg_v4i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lw $1, 16($sp) +; MIPS32-NEXT: sw $7, 12($1) +; MIPS32-NEXT: sw $6, 8($1) +; MIPS32-NEXT: sw $5, 4($1) +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sw $4, 0($1) + store <4 x i32> %vec, ptr %p + ret void +} + +define <4 x i32> @ret_v4i32(ptr %p) { +; MIPS64-LABEL: ret_v4i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: ld $2, 0($4) +; MIPS64-NEXT: ld $3, 8($4) +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS32-LABEL: ret_v4i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lw $3, 4($4) +; MIPS32-NEXT: lw $1, 8($4) +; MIPS32-NEXT: lw $5, 12($4) +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: move $4, $1 + %v = load <4 x i32>, ptr %p + ret <4 x i32> %v +} + +define void @call_v4i32(ptr %p) nounwind { +; MIPS64-LABEL: call_v4i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: daddiu $sp, $sp, -16 +; MIPS64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; MIPS64-NEXT: move $16, $4 +; MIPS64-NEXT: ld $5, 8($4) +; MIPS64-NEXT: ld $4, 0($4) +; MIPS64-NEXT: jal arg_v4i32 +; MIPS64-NEXT: nop +; MIPS64-NEXT: jal ret_v4i32 +; MIPS64-NEXT: nop +; MIPS64-NEXT: sd $3, 8($16) +; MIPS64-NEXT: sd $2, 0($16) +; MIPS64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 16 +; +; MIPS32-LABEL: call_v4i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: addiu $sp, $sp, -24 +; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $16, $4 +; MIPS32-NEXT: lw $7, 12($4) +; MIPS32-NEXT: lw $6, 8($4) +; MIPS32-NEXT: lw $5, 4($4) +; MIPS32-NEXT: lw $4, 0($4) +; MIPS32-NEXT: jal arg_v4i32 +; MIPS32-NEXT: nop +; MIPS32-NEXT: jal ret_v4i32 +; MIPS32-NEXT: nop +; MIPS32-NEXT: sw $5, 12($16) +; MIPS32-NEXT: sw $4, 8($16) +; MIPS32-NEXT: sw $3, 4($16) +; MIPS32-NEXT: sw $2, 0($16) +; MIPS32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 24 + %v1 = load <4 x i32>, ptr %p + call void @arg_v4i32(<4 x i32> %v1) + %v2 = call <4 x i32> @ret_v4i32() + store <4 x i32> %v2, ptr %p + ret void +} + +define void @arg_v5i32(<5 x i32> %vec, ptr %p) { +; MIPS64-LABEL: arg_v5i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: daddiu $1, $zero, 1 +; MIPS64-NEXT: dsll $1, $1, 32 +; MIPS64-NEXT: daddiu $1, $1, -1 +; MIPS64-NEXT: and $2, $7, $1 +; MIPS64-NEXT: dsll $3, $6, 32 +; MIPS64-NEXT: or $2, $2, $3 +; MIPS64-NEXT: sw $8, 16($9) +; MIPS64-NEXT: sd $2, 8($9) +; MIPS64-NEXT: and $1, $5, $1 +; MIPS64-NEXT: dsll $2, $4, 32 +; MIPS64-NEXT: or $1, $1, $2 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sd $1, 0($9) +; +; MIPS32-LABEL: arg_v5i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lw $1, 20($sp) +; MIPS32-NEXT: lw $2, 16($sp) +; MIPS32-NEXT: sw $2, 16($1) +; MIPS32-NEXT: sw $7, 12($1) +; MIPS32-NEXT: sw $6, 8($1) +; MIPS32-NEXT: sw $5, 4($1) +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sw $4, 0($1) + store <5 x i32> %vec, ptr %p + ret void +} + +define <5 x i32> @ret_v5i32(ptr %p) { +; MIPS64-LABEL: ret_v5i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: lw $1, 16($5) +; MIPS64-NEXT: sw $1, 16($4) +; MIPS64-NEXT: ld $1, 8($5) +; MIPS64-NEXT: sd $1, 8($4) +; MIPS64-NEXT: ld $1, 0($5) +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sd $1, 0($4) +; +; MIPS32-LABEL: ret_v5i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lw $1, 12($5) +; MIPS32-NEXT: lw $2, 16($5) +; MIPS32-NEXT: sw $2, 16($4) +; MIPS32-NEXT: sw $1, 12($4) +; MIPS32-NEXT: lw $1, 8($5) +; MIPS32-NEXT: sw $1, 8($4) +; MIPS32-NEXT: lw $1, 4($5) +; MIPS32-NEXT: sw $1, 4($4) +; MIPS32-NEXT: lw $1, 0($5) +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sw $1, 0($4) + %v = load <5 x i32>, ptr %p + ret <5 x i32> %v +} + +define void @call_v5i32(ptr %p) nounwind { +; MIPS64-LABEL: call_v5i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: daddiu $sp, $sp, -64 +; MIPS64-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $fp, 48($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $16, 40($sp) # 8-byte Folded Spill +; MIPS64-NEXT: move $fp, $sp +; MIPS64-NEXT: daddiu $1, $zero, -32 +; MIPS64-NEXT: and $sp, $sp, $1 +; MIPS64-NEXT: move $16, $4 +; MIPS64-NEXT: lw $8, 16($4) +; MIPS64-NEXT: ld $7, 8($4) +; MIPS64-NEXT: ld $5, 0($4) +; MIPS64-NEXT: dsrl $4, $5, 32 +; MIPS64-NEXT: jal arg_v5i32 +; MIPS64-NEXT: dsrl $6, $7, 32 +; MIPS64-NEXT: jal ret_v5i32 +; MIPS64-NEXT: daddiu $4, $sp, 0 +; MIPS64-NEXT: lw $1, 16($sp) +; MIPS64-NEXT: ld $2, 0($sp) +; MIPS64-NEXT: sd $2, 0($16) +; MIPS64-NEXT: ld $2, 8($sp) +; MIPS64-NEXT: sd $2, 8($16) +; MIPS64-NEXT: sw $1, 16($16) +; MIPS64-NEXT: move $sp, $fp +; MIPS64-NEXT: ld $16, 40($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $fp, 48($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 64 +; +; MIPS32-LABEL: call_v5i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: addiu $sp, $sp, -96 +; MIPS32-NEXT: sw $ra, 92($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $fp, 88($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $16, 84($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $fp, $sp +; MIPS32-NEXT: addiu $1, $zero, -32 +; MIPS32-NEXT: and $sp, $sp, $1 +; MIPS32-NEXT: move $16, $4 +; MIPS32-NEXT: lw $7, 12($4) +; MIPS32-NEXT: lw $6, 8($4) +; MIPS32-NEXT: lw $5, 4($4) +; MIPS32-NEXT: lw $4, 0($4) +; MIPS32-NEXT: lw $1, 16($16) +; MIPS32-NEXT: jal arg_v5i32 +; MIPS32-NEXT: sw $1, 16($sp) +; MIPS32-NEXT: jal ret_v5i32 +; MIPS32-NEXT: addiu $4, $sp, 32 +; MIPS32-NEXT: lw $1, 36($sp) +; MIPS32-NEXT: lw $2, 32($sp) +; MIPS32-NEXT: sw $2, 0($16) +; MIPS32-NEXT: sw $1, 4($16) +; MIPS32-NEXT: lw $1, 40($sp) +; MIPS32-NEXT: sw $1, 8($16) +; MIPS32-NEXT: lw $1, 44($sp) +; MIPS32-NEXT: sw $1, 12($16) +; MIPS32-NEXT: lw $1, 48($sp) +; MIPS32-NEXT: sw $1, 16($16) +; MIPS32-NEXT: move $sp, $fp +; MIPS32-NEXT: lw $16, 84($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $fp, 88($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $ra, 92($sp) # 4-byte Folded Reload +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 96 + %v1 = load <5 x i32>, ptr %p + call void @arg_v5i32(<5 x i32> %v1) + %v2 = call <5 x i32> @ret_v5i32() + store <5 x i32> %v2, ptr %p + ret void +} + +define void @arg_v8i32(<8 x i32> %vec, ptr %p) { +; MIPS64-LABEL: arg_v8i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: sd $7, 24($8) +; MIPS64-NEXT: sd $6, 16($8) +; MIPS64-NEXT: sd $5, 8($8) +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sd $4, 0($8) +; +; MIPS32-LABEL: arg_v8i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lw $1, 16($sp) +; MIPS32-NEXT: lw $2, 20($sp) +; MIPS32-NEXT: lw $3, 24($sp) +; MIPS32-NEXT: lw $8, 32($sp) +; MIPS32-NEXT: lw $9, 28($sp) +; MIPS32-NEXT: sw $9, 28($8) +; MIPS32-NEXT: sw $3, 24($8) +; MIPS32-NEXT: sw $2, 20($8) +; MIPS32-NEXT: sw $1, 16($8) +; MIPS32-NEXT: sw $7, 12($8) +; MIPS32-NEXT: sw $6, 8($8) +; MIPS32-NEXT: sw $5, 4($8) +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sw $4, 0($8) + store <8 x i32> %vec, ptr %p + ret void +} + +define <8 x i32> @ret_v8i32(ptr %p) { +; MIPS64-LABEL: ret_v8i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: ld $1, 24($5) +; MIPS64-NEXT: sd $1, 24($4) +; MIPS64-NEXT: ld $1, 16($5) +; MIPS64-NEXT: sd $1, 16($4) +; MIPS64-NEXT: ld $1, 8($5) +; MIPS64-NEXT: sd $1, 8($4) +; MIPS64-NEXT: ld $1, 0($5) +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sd $1, 0($4) +; +; MIPS32-LABEL: ret_v8i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lw $1, 12($5) +; MIPS32-NEXT: lw $2, 16($5) +; MIPS32-NEXT: lw $3, 20($5) +; MIPS32-NEXT: lw $6, 24($5) +; MIPS32-NEXT: lw $7, 28($5) +; MIPS32-NEXT: sw $7, 28($4) +; MIPS32-NEXT: sw $6, 24($4) +; MIPS32-NEXT: sw $3, 20($4) +; MIPS32-NEXT: sw $2, 16($4) +; MIPS32-NEXT: sw $1, 12($4) +; MIPS32-NEXT: lw $1, 8($5) +; MIPS32-NEXT: sw $1, 8($4) +; MIPS32-NEXT: lw $1, 4($5) +; MIPS32-NEXT: sw $1, 4($4) +; MIPS32-NEXT: lw $1, 0($5) +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sw $1, 0($4) + %v = load <8 x i32>, ptr %p + ret <8 x i32> %v +} + +define void @call_v8i32(ptr %p) nounwind { +; MIPS64-LABEL: call_v8i32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: daddiu $sp, $sp, -64 +; MIPS64-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $fp, 48($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $16, 40($sp) # 8-byte Folded Spill +; MIPS64-NEXT: move $fp, $sp +; MIPS64-NEXT: daddiu $1, $zero, -32 +; MIPS64-NEXT: move $16, $4 +; MIPS64-NEXT: ld $7, 24($4) +; MIPS64-NEXT: ld $6, 16($4) +; MIPS64-NEXT: ld $5, 8($4) +; MIPS64-NEXT: ld $4, 0($4) +; MIPS64-NEXT: jal arg_v8i32 +; MIPS64-NEXT: and $sp, $sp, $1 +; MIPS64-NEXT: jal ret_v8i32 +; MIPS64-NEXT: daddiu $4, $sp, 0 +; MIPS64-NEXT: ld $1, 0($sp) +; MIPS64-NEXT: lw $2, 16($sp) +; MIPS64-NEXT: lw $3, 20($sp) +; MIPS64-NEXT: lw $4, 24($sp) +; MIPS64-NEXT: lw $5, 28($sp) +; MIPS64-NEXT: sw $5, 28($16) +; MIPS64-NEXT: sw $4, 24($16) +; MIPS64-NEXT: sw $3, 20($16) +; MIPS64-NEXT: sw $2, 16($16) +; MIPS64-NEXT: lw $2, 12($sp) +; MIPS64-NEXT: sw $2, 12($16) +; MIPS64-NEXT: lw $2, 8($sp) +; MIPS64-NEXT: sw $2, 8($16) +; MIPS64-NEXT: sd $1, 0($16) +; MIPS64-NEXT: move $sp, $fp +; MIPS64-NEXT: ld $16, 40($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $fp, 48($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 64 +; +; MIPS32-LABEL: call_v8i32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: addiu $sp, $sp, -96 +; MIPS32-NEXT: sw $ra, 92($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $fp, 88($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $16, 84($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $fp, $sp +; MIPS32-NEXT: addiu $1, $zero, -32 +; MIPS32-NEXT: and $sp, $sp, $1 +; MIPS32-NEXT: move $16, $4 +; MIPS32-NEXT: lw $7, 12($4) +; MIPS32-NEXT: lw $6, 8($4) +; MIPS32-NEXT: lw $5, 4($4) +; MIPS32-NEXT: lw $4, 0($4) +; MIPS32-NEXT: lw $1, 16($16) +; MIPS32-NEXT: lw $2, 20($16) +; MIPS32-NEXT: lw $3, 24($16) +; MIPS32-NEXT: lw $8, 28($16) +; MIPS32-NEXT: sw $8, 28($sp) +; MIPS32-NEXT: sw $3, 24($sp) +; MIPS32-NEXT: sw $2, 20($sp) +; MIPS32-NEXT: jal arg_v8i32 +; MIPS32-NEXT: sw $1, 16($sp) +; MIPS32-NEXT: jal ret_v8i32 +; MIPS32-NEXT: addiu $4, $sp, 32 +; MIPS32-NEXT: lw $1, 44($sp) +; MIPS32-NEXT: lw $2, 48($sp) +; MIPS32-NEXT: lw $3, 52($sp) +; MIPS32-NEXT: lw $4, 56($sp) +; MIPS32-NEXT: lw $5, 60($sp) +; MIPS32-NEXT: sw $5, 28($16) +; MIPS32-NEXT: sw $4, 24($16) +; MIPS32-NEXT: sw $3, 20($16) +; MIPS32-NEXT: sw $2, 16($16) +; MIPS32-NEXT: sw $1, 12($16) +; MIPS32-NEXT: lw $1, 40($sp) +; MIPS32-NEXT: sw $1, 8($16) +; MIPS32-NEXT: lw $1, 36($sp) +; MIPS32-NEXT: sw $1, 4($16) +; MIPS32-NEXT: lw $1, 32($sp) +; MIPS32-NEXT: sw $1, 0($16) +; MIPS32-NEXT: move $sp, $fp +; MIPS32-NEXT: lw $16, 84($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $fp, 88($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $ra, 92($sp) # 4-byte Folded Reload +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 96 + %v1 = load <8 x i32>, ptr %p + call void @arg_v8i32(<8 x i32> %v1) + %v2 = call <8 x i32> @ret_v8i32() + store <8 x i32> %v2, ptr %p + ret void +} + +define void @arg_v3i24(<3 x i24> %vec, ptr %p) { +; MIPS64-LABEL: arg_v3i24: +; MIPS64: # %bb.0: +; MIPS64-NEXT: sll $1, $6, 0 +; MIPS64-NEXT: srl $2, $1, 8 +; MIPS64-NEXT: sll $3, $4, 0 +; MIPS64-NEXT: sll $4, $5, 0 +; MIPS64-NEXT: sb $1, 8($7) +; MIPS64-NEXT: sb $4, 5($7) +; MIPS64-NEXT: sb $3, 2($7) +; MIPS64-NEXT: sh $2, 6($7) +; MIPS64-NEXT: srl $1, $4, 8 +; MIPS64-NEXT: sb $1, 4($7) +; MIPS64-NEXT: srl $1, $4, 16 +; MIPS64-NEXT: sb $1, 3($7) +; MIPS64-NEXT: srl $1, $3, 8 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sh $1, 0($7) +; +; MIPS32-LABEL: arg_v3i24: +; MIPS32: # %bb.0: +; MIPS32-NEXT: srl $1, $6, 8 +; MIPS32-NEXT: sb $6, 8($7) +; MIPS32-NEXT: sb $5, 5($7) +; MIPS32-NEXT: sb $4, 2($7) +; MIPS32-NEXT: sh $1, 6($7) +; MIPS32-NEXT: srl $1, $5, 8 +; MIPS32-NEXT: sb $1, 4($7) +; MIPS32-NEXT: srl $1, $5, 16 +; MIPS32-NEXT: sb $1, 3($7) +; MIPS32-NEXT: srl $1, $4, 8 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sh $1, 0($7) + store <3 x i24> %vec, ptr %p + ret void +} + +define <3 x i24> @ret_v3i24(ptr %p) { +; MIPS64-LABEL: ret_v3i24: +; MIPS64: # %bb.0: +; MIPS64-NEXT: lbu $1, 8($4) +; MIPS64-NEXT: lh $2, 6($4) +; MIPS64-NEXT: sll $3, $2, 8 +; MIPS64-NEXT: lbu $2, 2($4) +; MIPS64-NEXT: lhu $5, 0($4) +; MIPS64-NEXT: sll $5, $5, 8 +; MIPS64-NEXT: or $2, $2, $5 +; MIPS64-NEXT: or $1, $1, $3 +; MIPS64-NEXT: lbu $3, 4($4) +; MIPS64-NEXT: sll $3, $3, 8 +; MIPS64-NEXT: lb $5, 3($4) +; MIPS64-NEXT: sll $5, $5, 16 +; MIPS64-NEXT: or $3, $5, $3 +; MIPS64-NEXT: lbu $4, 5($4) +; MIPS64-NEXT: or $3, $4, $3 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: move $4, $1 +; +; MIPS32-LABEL: ret_v3i24: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lbu $1, 8($4) +; MIPS32-NEXT: lh $2, 6($4) +; MIPS32-NEXT: sll $3, $2, 8 +; MIPS32-NEXT: lbu $2, 2($4) +; MIPS32-NEXT: lhu $5, 0($4) +; MIPS32-NEXT: sll $5, $5, 8 +; MIPS32-NEXT: or $2, $2, $5 +; MIPS32-NEXT: or $1, $1, $3 +; MIPS32-NEXT: lbu $3, 4($4) +; MIPS32-NEXT: sll $3, $3, 8 +; MIPS32-NEXT: lb $5, 3($4) +; MIPS32-NEXT: sll $5, $5, 16 +; MIPS32-NEXT: or $3, $5, $3 +; MIPS32-NEXT: lbu $4, 5($4) +; MIPS32-NEXT: or $3, $4, $3 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: move $4, $1 + %v = load <3 x i24>, ptr %p + ret <3 x i24> %v +} + +define void @call_v3i24(ptr %p) nounwind { +; MIPS64-LABEL: call_v3i24: +; MIPS64: # %bb.0: +; MIPS64-NEXT: daddiu $sp, $sp, -16 +; MIPS64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; MIPS64-NEXT: move $16, $4 +; MIPS64-NEXT: lbu $1, 4($4) +; MIPS64-NEXT: lbu $2, 8($4) +; MIPS64-NEXT: lh $3, 6($4) +; MIPS64-NEXT: dsll $3, $3, 8 +; MIPS64-NEXT: lbu $4, 2($4) +; MIPS64-NEXT: lh $5, 0($16) +; MIPS64-NEXT: dsll $5, $5, 8 +; MIPS64-NEXT: or $4, $4, $5 +; MIPS64-NEXT: or $6, $2, $3 +; MIPS64-NEXT: dsll $1, $1, 8 +; MIPS64-NEXT: lb $2, 3($16) +; MIPS64-NEXT: dsll $2, $2, 16 +; MIPS64-NEXT: or $1, $2, $1 +; MIPS64-NEXT: lbu $2, 5($16) +; MIPS64-NEXT: jal arg_v3i24 +; MIPS64-NEXT: or $5, $2, $1 +; MIPS64-NEXT: jal ret_v3i24 +; MIPS64-NEXT: nop +; MIPS64-NEXT: srl $1, $4, 8 +; MIPS64-NEXT: sb $4, 8($16) +; MIPS64-NEXT: sb $3, 5($16) +; MIPS64-NEXT: sb $2, 2($16) +; MIPS64-NEXT: sh $1, 6($16) +; MIPS64-NEXT: srl $1, $3, 8 +; MIPS64-NEXT: sb $1, 4($16) +; MIPS64-NEXT: srl $1, $3, 16 +; MIPS64-NEXT: sb $1, 3($16) +; MIPS64-NEXT: srl $1, $2, 8 +; MIPS64-NEXT: sh $1, 0($16) +; MIPS64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 16 +; +; MIPS32-LABEL: call_v3i24: +; MIPS32: # %bb.0: +; MIPS32-NEXT: addiu $sp, $sp, -24 +; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $16, $4 +; MIPS32-NEXT: lbu $1, 4($4) +; MIPS32-NEXT: lbu $2, 8($4) +; MIPS32-NEXT: lh $3, 6($4) +; MIPS32-NEXT: sll $3, $3, 8 +; MIPS32-NEXT: lbu $4, 2($4) +; MIPS32-NEXT: lhu $5, 0($16) +; MIPS32-NEXT: sll $5, $5, 8 +; MIPS32-NEXT: or $4, $4, $5 +; MIPS32-NEXT: or $6, $2, $3 +; MIPS32-NEXT: sll $1, $1, 8 +; MIPS32-NEXT: lb $2, 3($16) +; MIPS32-NEXT: sll $2, $2, 16 +; MIPS32-NEXT: or $1, $2, $1 +; MIPS32-NEXT: lbu $2, 5($16) +; MIPS32-NEXT: jal arg_v3i24 +; MIPS32-NEXT: or $5, $2, $1 +; MIPS32-NEXT: jal ret_v3i24 +; MIPS32-NEXT: nop +; MIPS32-NEXT: srl $1, $4, 8 +; MIPS32-NEXT: sb $4, 8($16) +; MIPS32-NEXT: sb $3, 5($16) +; MIPS32-NEXT: sb $2, 2($16) +; MIPS32-NEXT: sh $1, 6($16) +; MIPS32-NEXT: srl $1, $3, 8 +; MIPS32-NEXT: sb $1, 4($16) +; MIPS32-NEXT: srl $1, $3, 16 +; MIPS32-NEXT: sb $1, 3($16) +; MIPS32-NEXT: srl $1, $2, 8 +; MIPS32-NEXT: sh $1, 0($16) +; MIPS32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 24 + %v1 = load <3 x i24>, ptr %p + call void @arg_v3i24(<3 x i24> %v1) + %v2 = call <3 x i24> @ret_v3i24() + store <3 x i24> %v2, ptr %p + ret void +} + +define void @arg_v4i24(<4 x i24> %vec, ptr %p) { +; MIPS64-LABEL: arg_v4i24: +; MIPS64: # %bb.0: +; MIPS64-NEXT: sll $1, $7, 0 +; MIPS64-NEXT: sll $2, $6, 0 +; MIPS64-NEXT: srl $3, $2, 8 +; MIPS64-NEXT: srl $6, $1, 16 +; MIPS64-NEXT: srl $7, $1, 8 +; MIPS64-NEXT: sll $4, $4, 0 +; MIPS64-NEXT: sll $5, $5, 0 +; MIPS64-NEXT: sb $1, 11($8) +; MIPS64-NEXT: sb $2, 8($8) +; MIPS64-NEXT: sb $5, 5($8) +; MIPS64-NEXT: sb $4, 2($8) +; MIPS64-NEXT: sb $7, 10($8) +; MIPS64-NEXT: sb $6, 9($8) +; MIPS64-NEXT: sh $3, 6($8) +; MIPS64-NEXT: srl $1, $5, 8 +; MIPS64-NEXT: sb $1, 4($8) +; MIPS64-NEXT: srl $1, $5, 16 +; MIPS64-NEXT: sb $1, 3($8) +; MIPS64-NEXT: srl $1, $4, 8 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sh $1, 0($8) +; +; MIPS32-LABEL: arg_v4i24: +; MIPS32: # %bb.0: +; MIPS32-NEXT: srl $1, $6, 8 +; MIPS32-NEXT: srl $2, $7, 16 +; MIPS32-NEXT: srl $3, $7, 8 +; MIPS32-NEXT: lw $8, 16($sp) +; MIPS32-NEXT: sb $7, 11($8) +; MIPS32-NEXT: sb $6, 8($8) +; MIPS32-NEXT: sb $5, 5($8) +; MIPS32-NEXT: sb $4, 2($8) +; MIPS32-NEXT: sb $3, 10($8) +; MIPS32-NEXT: sb $2, 9($8) +; MIPS32-NEXT: sh $1, 6($8) +; MIPS32-NEXT: srl $1, $5, 8 +; MIPS32-NEXT: sb $1, 4($8) +; MIPS32-NEXT: srl $1, $5, 16 +; MIPS32-NEXT: sb $1, 3($8) +; MIPS32-NEXT: srl $1, $4, 8 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sh $1, 0($8) + store <4 x i24> %vec, ptr %p + ret void +} + +define <4 x i24> @ret_v4i24(ptr %p) { +; MIPS64-LABEL: ret_v4i24: +; MIPS64: # %bb.0: +; MIPS64-NEXT: lbu $1, 4($4) +; MIPS64-NEXT: sll $1, $1, 8 +; MIPS64-NEXT: lbu $2, 3($4) +; MIPS64-NEXT: sll $2, $2, 16 +; MIPS64-NEXT: or $3, $2, $1 +; MIPS64-NEXT: lbu $5, 5($4) +; MIPS64-NEXT: lbu $1, 8($4) +; MIPS64-NEXT: lhu $2, 6($4) +; MIPS64-NEXT: sll $6, $2, 8 +; MIPS64-NEXT: lbu $2, 2($4) +; MIPS64-NEXT: lhu $7, 0($4) +; MIPS64-NEXT: sll $7, $7, 8 +; MIPS64-NEXT: or $2, $2, $7 +; MIPS64-NEXT: or $1, $1, $6 +; MIPS64-NEXT: or $3, $5, $3 +; MIPS64-NEXT: lbu $5, 10($4) +; MIPS64-NEXT: sll $5, $5, 8 +; MIPS64-NEXT: lbu $6, 9($4) +; MIPS64-NEXT: sll $6, $6, 16 +; MIPS64-NEXT: or $5, $6, $5 +; MIPS64-NEXT: lbu $4, 11($4) +; MIPS64-NEXT: or $5, $4, $5 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: move $4, $1 +; +; MIPS32-LABEL: ret_v4i24: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lbu $1, 4($4) +; MIPS32-NEXT: sll $1, $1, 8 +; MIPS32-NEXT: lbu $2, 3($4) +; MIPS32-NEXT: sll $2, $2, 16 +; MIPS32-NEXT: or $3, $2, $1 +; MIPS32-NEXT: lbu $5, 5($4) +; MIPS32-NEXT: lbu $1, 8($4) +; MIPS32-NEXT: lhu $2, 6($4) +; MIPS32-NEXT: sll $6, $2, 8 +; MIPS32-NEXT: lbu $2, 2($4) +; MIPS32-NEXT: lhu $7, 0($4) +; MIPS32-NEXT: sll $7, $7, 8 +; MIPS32-NEXT: or $2, $2, $7 +; MIPS32-NEXT: or $1, $1, $6 +; MIPS32-NEXT: or $3, $5, $3 +; MIPS32-NEXT: lbu $5, 10($4) +; MIPS32-NEXT: sll $5, $5, 8 +; MIPS32-NEXT: lbu $6, 9($4) +; MIPS32-NEXT: sll $6, $6, 16 +; MIPS32-NEXT: or $5, $6, $5 +; MIPS32-NEXT: lbu $4, 11($4) +; MIPS32-NEXT: or $5, $4, $5 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: move $4, $1 + %v = load <4 x i24>, ptr %p + ret <4 x i24> %v +} + +define void @call_v4i24(ptr %p) nounwind { +; MIPS64-LABEL: call_v4i24: +; MIPS64: # %bb.0: +; MIPS64-NEXT: daddiu $sp, $sp, -16 +; MIPS64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; MIPS64-NEXT: move $16, $4 +; MIPS64-NEXT: lbu $1, 4($4) +; MIPS64-NEXT: dsll $1, $1, 8 +; MIPS64-NEXT: lb $2, 3($4) +; MIPS64-NEXT: dsll $2, $2, 16 +; MIPS64-NEXT: or $1, $2, $1 +; MIPS64-NEXT: lbu $2, 10($4) +; MIPS64-NEXT: lbu $3, 5($4) +; MIPS64-NEXT: lbu $5, 8($4) +; MIPS64-NEXT: lh $4, 6($4) +; MIPS64-NEXT: dsll $6, $4, 8 +; MIPS64-NEXT: lbu $4, 2($16) +; MIPS64-NEXT: lh $7, 0($16) +; MIPS64-NEXT: dsll $7, $7, 8 +; MIPS64-NEXT: or $4, $4, $7 +; MIPS64-NEXT: or $6, $5, $6 +; MIPS64-NEXT: or $5, $3, $1 +; MIPS64-NEXT: dsll $1, $2, 8 +; MIPS64-NEXT: lb $2, 9($16) +; MIPS64-NEXT: dsll $2, $2, 16 +; MIPS64-NEXT: or $1, $2, $1 +; MIPS64-NEXT: lbu $2, 11($16) +; MIPS64-NEXT: jal arg_v4i24 +; MIPS64-NEXT: or $7, $2, $1 +; MIPS64-NEXT: jal ret_v4i24 +; MIPS64-NEXT: nop +; MIPS64-NEXT: srl $1, $4, 8 +; MIPS64-NEXT: srl $6, $5, 16 +; MIPS64-NEXT: srl $7, $5, 8 +; MIPS64-NEXT: sb $5, 11($16) +; MIPS64-NEXT: sb $4, 8($16) +; MIPS64-NEXT: sb $3, 5($16) +; MIPS64-NEXT: sb $2, 2($16) +; MIPS64-NEXT: sb $7, 10($16) +; MIPS64-NEXT: sb $6, 9($16) +; MIPS64-NEXT: sh $1, 6($16) +; MIPS64-NEXT: srl $1, $3, 8 +; MIPS64-NEXT: sb $1, 4($16) +; MIPS64-NEXT: srl $1, $3, 16 +; MIPS64-NEXT: sb $1, 3($16) +; MIPS64-NEXT: srl $1, $2, 8 +; MIPS64-NEXT: sh $1, 0($16) +; MIPS64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 16 +; +; MIPS32-LABEL: call_v4i24: +; MIPS32: # %bb.0: +; MIPS32-NEXT: addiu $sp, $sp, -24 +; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $16, $4 +; MIPS32-NEXT: lbu $1, 4($4) +; MIPS32-NEXT: sll $1, $1, 8 +; MIPS32-NEXT: lbu $2, 3($4) +; MIPS32-NEXT: sll $2, $2, 16 +; MIPS32-NEXT: or $1, $2, $1 +; MIPS32-NEXT: lbu $2, 10($4) +; MIPS32-NEXT: lbu $3, 5($4) +; MIPS32-NEXT: lbu $5, 8($4) +; MIPS32-NEXT: lhu $4, 6($4) +; MIPS32-NEXT: sll $6, $4, 8 +; MIPS32-NEXT: lbu $4, 2($16) +; MIPS32-NEXT: lhu $7, 0($16) +; MIPS32-NEXT: sll $7, $7, 8 +; MIPS32-NEXT: or $4, $4, $7 +; MIPS32-NEXT: or $6, $5, $6 +; MIPS32-NEXT: or $5, $3, $1 +; MIPS32-NEXT: sll $1, $2, 8 +; MIPS32-NEXT: lbu $2, 9($16) +; MIPS32-NEXT: sll $2, $2, 16 +; MIPS32-NEXT: or $1, $2, $1 +; MIPS32-NEXT: lbu $2, 11($16) +; MIPS32-NEXT: jal arg_v4i24 +; MIPS32-NEXT: or $7, $2, $1 +; MIPS32-NEXT: jal ret_v4i24 +; MIPS32-NEXT: nop +; MIPS32-NEXT: srl $1, $4, 8 +; MIPS32-NEXT: srl $6, $5, 16 +; MIPS32-NEXT: srl $7, $5, 8 +; MIPS32-NEXT: sb $5, 11($16) +; MIPS32-NEXT: sb $4, 8($16) +; MIPS32-NEXT: sb $3, 5($16) +; MIPS32-NEXT: sb $2, 2($16) +; MIPS32-NEXT: sb $7, 10($16) +; MIPS32-NEXT: sb $6, 9($16) +; MIPS32-NEXT: sh $1, 6($16) +; MIPS32-NEXT: srl $1, $3, 8 +; MIPS32-NEXT: sb $1, 4($16) +; MIPS32-NEXT: srl $1, $3, 16 +; MIPS32-NEXT: sb $1, 3($16) +; MIPS32-NEXT: srl $1, $2, 8 +; MIPS32-NEXT: sh $1, 0($16) +; MIPS32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 24 + %v1 = load <4 x i24>, ptr %p + call void @arg_v4i24(<4 x i24> %v1) + %v2 = call <4 x i24> @ret_v4i24() + store <4 x i24> %v2, ptr %p + ret void +} + +define void @arg_v4i18(<4 x i18> %vec, ptr %p) { +; MIPS64-LABEL: arg_v4i18: +; MIPS64: # %bb.0: +; MIPS64-NEXT: lui $1, 3 +; MIPS64-NEXT: ori $2, $1, 65535 +; MIPS64-NEXT: and $3, $5, $2 +; MIPS64-NEXT: dsll $3, $3, 36 +; MIPS64-NEXT: dsll $5, $4, 54 +; MIPS64-NEXT: or $3, $5, $3 +; MIPS64-NEXT: and $2, $6, $2 +; MIPS64-NEXT: dsll $2, $2, 18 +; MIPS64-NEXT: or $2, $3, $2 +; MIPS64-NEXT: ori $1, $1, 65280 +; MIPS64-NEXT: and $1, $7, $1 +; MIPS64-NEXT: sb $7, 8($8) +; MIPS64-NEXT: or $1, $2, $1 +; MIPS64-NEXT: daddiu $2, $zero, 255 +; MIPS64-NEXT: dsrl $1, $1, 8 +; MIPS64-NEXT: dsll $2, $2, 56 +; MIPS64-NEXT: dsll $3, $4, 46 +; MIPS64-NEXT: and $2, $3, $2 +; MIPS64-NEXT: or $1, $2, $1 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sd $1, 0($8) +; +; MIPS32-LABEL: arg_v4i18: +; MIPS32: # %bb.0: +; MIPS32-NEXT: sll $1, $4, 14 +; MIPS32-NEXT: lui $2, 63 +; MIPS32-NEXT: lui $3, 65280 +; MIPS32-NEXT: and $1, $1, $3 +; MIPS32-NEXT: ori $2, $2, 65280 +; MIPS32-NEXT: sll $3, $5, 4 +; MIPS32-NEXT: and $2, $3, $2 +; MIPS32-NEXT: sll $4, $4, 22 +; MIPS32-NEXT: or $2, $4, $2 +; MIPS32-NEXT: srl $2, $2, 8 +; MIPS32-NEXT: lui $4, 3 +; MIPS32-NEXT: or $1, $1, $2 +; MIPS32-NEXT: ori $2, $4, 65280 +; MIPS32-NEXT: and $2, $7, $2 +; MIPS32-NEXT: sll $5, $6, 18 +; MIPS32-NEXT: or $2, $5, $2 +; MIPS32-NEXT: lw $5, 16($sp) +; MIPS32-NEXT: sb $7, 8($5) +; MIPS32-NEXT: sw $1, 0($5) +; MIPS32-NEXT: srl $1, $2, 8 +; MIPS32-NEXT: ori $2, $4, 49152 +; MIPS32-NEXT: and $2, $6, $2 +; MIPS32-NEXT: srl $2, $2, 14 +; MIPS32-NEXT: or $2, $3, $2 +; MIPS32-NEXT: sll $2, $2, 24 +; MIPS32-NEXT: or $1, $1, $2 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sw $1, 4($5) + store <4 x i18> %vec, ptr %p + ret void +} + +define <4 x i18> @ret_v4i18(ptr %p) { +; MIPS64-LABEL: ret_v4i18: +; MIPS64: # %bb.0: +; MIPS64-NEXT: lbu $1, 8($4) +; MIPS64-NEXT: ld $2, 0($4) +; MIPS64-NEXT: dsll $6, $2, 8 +; MIPS64-NEXT: or $1, $1, $6 +; MIPS64-NEXT: sll $3, $2, 0 +; MIPS64-NEXT: sll $1, $1, 0 +; MIPS64-NEXT: srl $4, $3, 10 +; MIPS64-NEXT: lui $3, 3 +; MIPS64-NEXT: ori $5, $3, 65535 +; MIPS64-NEXT: dsrl $3, $2, 28 +; MIPS64-NEXT: sll $3, $3, 0 +; MIPS64-NEXT: lui $7, 3 +; MIPS64-NEXT: and $3, $3, $5 +; MIPS64-NEXT: and $4, $4, $5 +; MIPS64-NEXT: and $5, $1, $5 +; MIPS64-NEXT: ori $1, $7, 64512 +; MIPS64-NEXT: dsrl $2, $2, 46 +; MIPS64-NEXT: and $1, $2, $1 +; MIPS64-NEXT: dsrl $2, $6, 54 +; MIPS64-NEXT: or $1, $2, $1 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sll $2, $1, 0 +; +; MIPS32-LABEL: ret_v4i18: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lbu $1, 8($4) +; MIPS32-NEXT: lw $2, 4($4) +; MIPS32-NEXT: sll $6, $2, 8 +; MIPS32-NEXT: lui $3, 3 +; MIPS32-NEXT: or $1, $1, $6 +; MIPS32-NEXT: ori $5, $3, 64512 +; MIPS32-NEXT: lw $4, 0($4) +; MIPS32-NEXT: srl $7, $4, 14 +; MIPS32-NEXT: and $5, $7, $5 +; MIPS32-NEXT: srl $7, $2, 24 +; MIPS32-NEXT: ori $8, $3, 65535 +; MIPS32-NEXT: sll $3, $4, 8 +; MIPS32-NEXT: srl $2, $3, 22 +; MIPS32-NEXT: or $2, $2, $5 +; MIPS32-NEXT: and $5, $1, $8 +; MIPS32-NEXT: or $1, $3, $7 +; MIPS32-NEXT: srl $1, $1, 4 +; MIPS32-NEXT: and $3, $1, $8 +; MIPS32-NEXT: sll $1, $7, 14 +; MIPS32-NEXT: srl $4, $6, 18 +; MIPS32-NEXT: or $1, $4, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: and $4, $1, $8 + %v = load <4 x i18>, ptr %p + ret <4 x i18> %v +} + +define void @call_v4i18(ptr %p) nounwind { +; MIPS64-LABEL: call_v4i18: +; MIPS64: # %bb.0: +; MIPS64-NEXT: daddiu $sp, $sp, -48 +; MIPS64-NEXT: sd $ra, 40($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $19, 32($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $18, 24($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $17, 16($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $16, 8($sp) # 8-byte Folded Spill +; MIPS64-NEXT: move $16, $4 +; MIPS64-NEXT: lui $17, 3 +; MIPS64-NEXT: ori $1, $17, 64512 +; MIPS64-NEXT: ld $2, 0($4) +; MIPS64-NEXT: dsrl $3, $2, 46 +; MIPS64-NEXT: dsrl $4, $2, 10 +; MIPS64-NEXT: ori $18, $17, 65535 +; MIPS64-NEXT: dsrl $5, $2, 28 +; MIPS64-NEXT: and $5, $5, $18 +; MIPS64-NEXT: and $6, $4, $18 +; MIPS64-NEXT: and $1, $3, $1 +; MIPS64-NEXT: dsll $2, $2, 8 +; MIPS64-NEXT: dsrl $3, $2, 54 +; MIPS64-NEXT: or $4, $3, $1 +; MIPS64-NEXT: lbu $1, 8($16) +; MIPS64-NEXT: or $1, $1, $2 +; MIPS64-NEXT: jal arg_v4i18 +; MIPS64-NEXT: and $7, $1, $18 +; MIPS64-NEXT: daddiu $1, $zero, 255 +; MIPS64-NEXT: dsll $19, $1, 56 +; MIPS64-NEXT: jal ret_v4i18 +; MIPS64-NEXT: ori $17, $17, 65280 +; MIPS64-NEXT: # kill: def $v0 killed $v0 def $v0_64 +; MIPS64-NEXT: # kill: def $v1 killed $v1 def $v1_64 +; MIPS64-NEXT: # kill: def $a0 killed $a0 def $a0_64 +; MIPS64-NEXT: # kill: def $a1 killed $a1 def $a1_64 +; MIPS64-NEXT: dsll $1, $2, 54 +; MIPS64-NEXT: and $3, $3, $18 +; MIPS64-NEXT: dsll $3, $3, 36 +; MIPS64-NEXT: or $1, $1, $3 +; MIPS64-NEXT: and $3, $4, $18 +; MIPS64-NEXT: dsll $3, $3, 18 +; MIPS64-NEXT: sb $5, 8($16) +; MIPS64-NEXT: or $1, $1, $3 +; MIPS64-NEXT: and $3, $5, $17 +; MIPS64-NEXT: or $1, $1, $3 +; MIPS64-NEXT: dsrl $1, $1, 8 +; MIPS64-NEXT: dsll $2, $2, 46 +; MIPS64-NEXT: and $2, $2, $19 +; MIPS64-NEXT: or $1, $2, $1 +; MIPS64-NEXT: sd $1, 0($16) +; MIPS64-NEXT: ld $16, 8($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $17, 16($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $18, 24($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $19, 32($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $ra, 40($sp) # 8-byte Folded Reload +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 48 +; +; MIPS32-LABEL: call_v4i18: +; MIPS32: # %bb.0: +; MIPS32-NEXT: addiu $sp, $sp, -40 +; MIPS32-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $19, 32($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $18, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $17, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $16, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $16, $4 +; MIPS32-NEXT: lw $1, 4($4) +; MIPS32-NEXT: srl $2, $1, 24 +; MIPS32-NEXT: lw $3, 0($4) +; MIPS32-NEXT: sll $4, $3, 8 +; MIPS32-NEXT: or $5, $4, $2 +; MIPS32-NEXT: lbu $6, 8($16) +; MIPS32-NEXT: sll $1, $1, 8 +; MIPS32-NEXT: srl $5, $5, 4 +; MIPS32-NEXT: or $6, $6, $1 +; MIPS32-NEXT: lui $17, 3 +; MIPS32-NEXT: ori $7, $17, 64512 +; MIPS32-NEXT: srl $3, $3, 14 +; MIPS32-NEXT: and $3, $3, $7 +; MIPS32-NEXT: ori $8, $17, 65535 +; MIPS32-NEXT: srl $4, $4, 22 +; MIPS32-NEXT: or $4, $4, $3 +; MIPS32-NEXT: and $7, $6, $8 +; MIPS32-NEXT: and $5, $5, $8 +; MIPS32-NEXT: sll $2, $2, 14 +; MIPS32-NEXT: srl $1, $1, 18 +; MIPS32-NEXT: or $1, $1, $2 +; MIPS32-NEXT: jal arg_v4i18 +; MIPS32-NEXT: and $6, $1, $8 +; MIPS32-NEXT: ori $18, $17, 49152 +; MIPS32-NEXT: ori $17, $17, 65280 +; MIPS32-NEXT: lui $1, 63 +; MIPS32-NEXT: jal ret_v4i18 +; MIPS32-NEXT: ori $19, $1, 65280 +; MIPS32-NEXT: lui $1, 65280 +; MIPS32-NEXT: and $6, $5, $17 +; MIPS32-NEXT: sll $7, $4, 18 +; MIPS32-NEXT: or $6, $7, $6 +; MIPS32-NEXT: srl $6, $6, 8 +; MIPS32-NEXT: and $4, $4, $18 +; MIPS32-NEXT: srl $4, $4, 14 +; MIPS32-NEXT: sll $3, $3, 4 +; MIPS32-NEXT: or $4, $3, $4 +; MIPS32-NEXT: sll $4, $4, 24 +; MIPS32-NEXT: or $4, $6, $4 +; MIPS32-NEXT: sll $6, $2, 14 +; MIPS32-NEXT: sb $5, 8($16) +; MIPS32-NEXT: sw $4, 4($16) +; MIPS32-NEXT: and $1, $6, $1 +; MIPS32-NEXT: and $3, $3, $19 +; MIPS32-NEXT: sll $2, $2, 22 +; MIPS32-NEXT: or $2, $2, $3 +; MIPS32-NEXT: srl $2, $2, 8 +; MIPS32-NEXT: or $1, $1, $2 +; MIPS32-NEXT: sw $1, 0($16) +; MIPS32-NEXT: lw $16, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $17, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $18, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $19, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 40 + %v1 = load <4 x i18>, ptr %p + call void @arg_v4i18(<4 x i18> %v1) + %v2 = call <4 x i18> @ret_v4i18() + store <4 x i18> %v2, ptr %p + ret void +} + +define void @arg_v7i18(<7 x i18> %vec, ptr %p) { +; MIPS64-LABEL: arg_v7i18: +; MIPS64: # %bb.0: +; MIPS64-NEXT: lui $1, 3 +; MIPS64-NEXT: ori $2, $1, 65535 +; MIPS64-NEXT: and $3, $8, $2 +; MIPS64-NEXT: dsll $3, $3, 36 +; MIPS64-NEXT: dsll $8, $7, 54 +; MIPS64-NEXT: or $3, $8, $3 +; MIPS64-NEXT: and $8, $9, $2 +; MIPS64-NEXT: dsll $8, $8, 18 +; MIPS64-NEXT: or $3, $3, $8 +; MIPS64-NEXT: and $5, $5, $2 +; MIPS64-NEXT: and $8, $10, $2 +; MIPS64-NEXT: or $3, $3, $8 +; MIPS64-NEXT: dsll $5, $5, 26 +; MIPS64-NEXT: dsll $4, $4, 44 +; MIPS64-NEXT: or $4, $4, $5 +; MIPS64-NEXT: and $2, $6, $2 +; MIPS64-NEXT: dsll $2, $2, 8 +; MIPS64-NEXT: sd $3, 8($11) +; MIPS64-NEXT: or $2, $4, $2 +; MIPS64-NEXT: ori $1, $1, 64512 +; MIPS64-NEXT: and $1, $7, $1 +; MIPS64-NEXT: dsrl $1, $1, 10 +; MIPS64-NEXT: or $1, $2, $1 +; MIPS64-NEXT: daddiu $2, $zero, 1 +; MIPS64-NEXT: dsll $2, $2, 62 +; MIPS64-NEXT: daddiu $2, $2, -1 +; MIPS64-NEXT: and $1, $1, $2 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sd $1, 0($11) +; +; MIPS32-LABEL: arg_v7i18: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lui $1, 3 +; MIPS32-NEXT: ori $2, $1, 65535 +; MIPS32-NEXT: and $3, $6, $2 +; MIPS32-NEXT: sll $3, $3, 8 +; MIPS32-NEXT: ori $6, $1, 65472 +; MIPS32-NEXT: and $6, $5, $6 +; MIPS32-NEXT: srl $6, $6, 6 +; MIPS32-NEXT: sll $5, $5, 26 +; MIPS32-NEXT: sll $4, $4, 12 +; MIPS32-NEXT: or $4, $4, $6 +; MIPS32-NEXT: or $3, $5, $3 +; MIPS32-NEXT: ori $5, $1, 64512 +; MIPS32-NEXT: and $5, $7, $5 +; MIPS32-NEXT: srl $5, $5, 10 +; MIPS32-NEXT: lui $6, 16383 +; MIPS32-NEXT: ori $6, $6, 65535 +; MIPS32-NEXT: lw $8, 24($sp) +; MIPS32-NEXT: lw $9, 16($sp) +; MIPS32-NEXT: or $3, $3, $5 +; MIPS32-NEXT: and $5, $9, $2 +; MIPS32-NEXT: and $4, $4, $6 +; MIPS32-NEXT: and $2, $8, $2 +; MIPS32-NEXT: lw $6, 20($sp) +; MIPS32-NEXT: sll $8, $6, 18 +; MIPS32-NEXT: or $2, $8, $2 +; MIPS32-NEXT: lw $8, 28($sp) +; MIPS32-NEXT: sw $2, 12($8) +; MIPS32-NEXT: sw $4, 0($8) +; MIPS32-NEXT: sw $3, 4($8) +; MIPS32-NEXT: sll $2, $5, 4 +; MIPS32-NEXT: sll $3, $7, 22 +; MIPS32-NEXT: or $2, $3, $2 +; MIPS32-NEXT: ori $1, $1, 49152 +; MIPS32-NEXT: and $1, $6, $1 +; MIPS32-NEXT: srl $1, $1, 14 +; MIPS32-NEXT: or $1, $2, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sw $1, 8($8) + store <7 x i18> %vec, ptr %p + ret void +} + +define <7 x i18> @ret_v7i18(ptr %p) { +; MIPS64-LABEL: ret_v7i18: +; MIPS64: # %bb.0: +; MIPS64-NEXT: ld $1, 0($5) +; MIPS64-NEXT: sd $1, 0($4) +; MIPS64-NEXT: ld $1, 8($5) +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sd $1, 8($4) +; +; MIPS32-LABEL: ret_v7i18: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lw $1, 0($5) +; MIPS32-NEXT: sw $1, 0($4) +; MIPS32-NEXT: lw $1, 4($5) +; MIPS32-NEXT: sw $1, 4($4) +; MIPS32-NEXT: lw $1, 12($5) +; MIPS32-NEXT: sw $1, 12($4) +; MIPS32-NEXT: lw $1, 8($5) +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sw $1, 8($4) + %v = load <7 x i18>, ptr %p + ret <7 x i18> %v +} + +define void @call_v7i18(ptr %p) nounwind { +; MIPS64-LABEL: call_v7i18: +; MIPS64: # %bb.0: +; MIPS64-NEXT: daddiu $sp, $sp, -32 +; MIPS64-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $16, 16($sp) # 8-byte Folded Spill +; MIPS64-NEXT: move $16, $4 +; MIPS64-NEXT: ld $1, 0($4) +; MIPS64-NEXT: ld $2, 8($4) +; MIPS64-NEXT: dsrl $3, $2, 18 +; MIPS64-NEXT: dsrl $4, $2, 36 +; MIPS64-NEXT: dsrl $6, $1, 8 +; MIPS64-NEXT: dsrl $5, $1, 26 +; MIPS64-NEXT: lui $7, 3 +; MIPS64-NEXT: ori $7, $7, 65535 +; MIPS64-NEXT: and $10, $2, $7 +; MIPS64-NEXT: and $5, $5, $7 +; MIPS64-NEXT: and $6, $6, $7 +; MIPS64-NEXT: and $8, $4, $7 +; MIPS64-NEXT: and $9, $3, $7 +; MIPS64-NEXT: dsll $3, $1, 10 +; MIPS64-NEXT: dsrl $2, $2, 54 +; MIPS64-NEXT: or $2, $2, $3 +; MIPS64-NEXT: and $7, $2, $7 +; MIPS64-NEXT: jal arg_v7i18 +; MIPS64-NEXT: dsrl $4, $1, 44 +; MIPS64-NEXT: jal ret_v7i18 +; MIPS64-NEXT: daddiu $4, $sp, 0 +; MIPS64-NEXT: ld $1, 0($sp) +; MIPS64-NEXT: sd $1, 0($16) +; MIPS64-NEXT: ld $1, 8($sp) +; MIPS64-NEXT: sd $1, 8($16) +; MIPS64-NEXT: ld $16, 16($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 32 +; +; MIPS32-LABEL: call_v7i18: +; MIPS32: # %bb.0: +; MIPS32-NEXT: addiu $sp, $sp, -64 +; MIPS32-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $16, 52($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $fp, $sp +; MIPS32-NEXT: addiu $1, $zero, -16 +; MIPS32-NEXT: and $sp, $sp, $1 +; MIPS32-NEXT: move $16, $4 +; MIPS32-NEXT: lw $1, 8($4) +; MIPS32-NEXT: sll $2, $1, 14 +; MIPS32-NEXT: lw $3, 12($4) +; MIPS32-NEXT: srl $4, $3, 18 +; MIPS32-NEXT: or $2, $4, $2 +; MIPS32-NEXT: srl $4, $1, 4 +; MIPS32-NEXT: lui $5, 3 +; MIPS32-NEXT: ori $7, $5, 65535 +; MIPS32-NEXT: and $2, $2, $7 +; MIPS32-NEXT: and $4, $4, $7 +; MIPS32-NEXT: and $3, $3, $7 +; MIPS32-NEXT: lw $8, 4($16) +; MIPS32-NEXT: lw $9, 0($16) +; MIPS32-NEXT: sll $5, $9, 6 +; MIPS32-NEXT: srl $6, $8, 26 +; MIPS32-NEXT: sw $3, 24($sp) +; MIPS32-NEXT: sw $4, 16($sp) +; MIPS32-NEXT: sw $2, 20($sp) +; MIPS32-NEXT: or $2, $6, $5 +; MIPS32-NEXT: srl $3, $8, 8 +; MIPS32-NEXT: and $6, $3, $7 +; MIPS32-NEXT: and $5, $2, $7 +; MIPS32-NEXT: sll $2, $8, 10 +; MIPS32-NEXT: srl $1, $1, 22 +; MIPS32-NEXT: or $1, $1, $2 +; MIPS32-NEXT: and $7, $1, $7 +; MIPS32-NEXT: jal arg_v7i18 +; MIPS32-NEXT: srl $4, $9, 12 +; MIPS32-NEXT: jal ret_v7i18 +; MIPS32-NEXT: addiu $4, $sp, 32 +; MIPS32-NEXT: lw $1, 32($sp) +; MIPS32-NEXT: sw $1, 0($16) +; MIPS32-NEXT: lw $1, 36($sp) +; MIPS32-NEXT: sw $1, 4($16) +; MIPS32-NEXT: lw $1, 40($sp) +; MIPS32-NEXT: sw $1, 8($16) +; MIPS32-NEXT: lw $1, 44($sp) +; MIPS32-NEXT: sw $1, 12($16) +; MIPS32-NEXT: move $sp, $fp +; MIPS32-NEXT: lw $16, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 64 + %v1 = load <7 x i18>, ptr %p + call void @arg_v7i18(<7 x i18> %v1) + %v2 = call <7 x i18> @ret_v7i18() + store <7 x i18> %v2, ptr %p + ret void +} + +define void @arg_v2i128(<2 x i128> %vec, ptr %p) { +; MIPS64-LABEL: arg_v2i128: +; MIPS64: # %bb.0: +; MIPS64-NEXT: sd $7, 24($8) +; MIPS64-NEXT: sd $6, 16($8) +; MIPS64-NEXT: sd $5, 8($8) +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sd $4, 0($8) +; +; MIPS32-LABEL: arg_v2i128: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lw $1, 16($sp) +; MIPS32-NEXT: lw $2, 20($sp) +; MIPS32-NEXT: lw $3, 24($sp) +; MIPS32-NEXT: lw $8, 32($sp) +; MIPS32-NEXT: lw $9, 28($sp) +; MIPS32-NEXT: sw $9, 28($8) +; MIPS32-NEXT: sw $3, 24($8) +; MIPS32-NEXT: sw $2, 20($8) +; MIPS32-NEXT: sw $1, 16($8) +; MIPS32-NEXT: sw $7, 12($8) +; MIPS32-NEXT: sw $6, 8($8) +; MIPS32-NEXT: sw $5, 4($8) +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sw $4, 0($8) + store <2 x i128> %vec, ptr %p + ret void +} + +define <2 x i128> @ret_v2i128(ptr %p) { +; MIPS64-LABEL: ret_v2i128: +; MIPS64: # %bb.0: +; MIPS64-NEXT: ld $1, 24($5) +; MIPS64-NEXT: sd $1, 24($4) +; MIPS64-NEXT: ld $1, 16($5) +; MIPS64-NEXT: sd $1, 16($4) +; MIPS64-NEXT: ld $1, 8($5) +; MIPS64-NEXT: sd $1, 8($4) +; MIPS64-NEXT: ld $1, 0($5) +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sd $1, 0($4) +; +; MIPS32-LABEL: ret_v2i128: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lw $1, 12($5) +; MIPS32-NEXT: lw $2, 16($5) +; MIPS32-NEXT: lw $3, 20($5) +; MIPS32-NEXT: lw $6, 24($5) +; MIPS32-NEXT: lw $7, 28($5) +; MIPS32-NEXT: sw $7, 28($4) +; MIPS32-NEXT: sw $6, 24($4) +; MIPS32-NEXT: sw $3, 20($4) +; MIPS32-NEXT: sw $2, 16($4) +; MIPS32-NEXT: sw $1, 12($4) +; MIPS32-NEXT: lw $1, 8($5) +; MIPS32-NEXT: sw $1, 8($4) +; MIPS32-NEXT: lw $1, 4($5) +; MIPS32-NEXT: sw $1, 4($4) +; MIPS32-NEXT: lw $1, 0($5) +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sw $1, 0($4) + %v = load <2 x i128>, ptr %p + ret <2 x i128> %v +} + +define void @call_v2i128(ptr %p) nounwind { +; MIPS64-LABEL: call_v2i128: +; MIPS64: # %bb.0: +; MIPS64-NEXT: daddiu $sp, $sp, -64 +; MIPS64-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $fp, 48($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $16, 40($sp) # 8-byte Folded Spill +; MIPS64-NEXT: move $fp, $sp +; MIPS64-NEXT: daddiu $1, $zero, -32 +; MIPS64-NEXT: move $16, $4 +; MIPS64-NEXT: ld $7, 24($4) +; MIPS64-NEXT: ld $6, 16($4) +; MIPS64-NEXT: ld $5, 8($4) +; MIPS64-NEXT: ld $4, 0($4) +; MIPS64-NEXT: jal arg_v2i128 +; MIPS64-NEXT: and $sp, $sp, $1 +; MIPS64-NEXT: jal ret_v2i128 +; MIPS64-NEXT: daddiu $4, $sp, 0 +; MIPS64-NEXT: ld $1, 16($sp) +; MIPS64-NEXT: sd $1, 16($16) +; MIPS64-NEXT: ld $1, 24($sp) +; MIPS64-NEXT: sd $1, 24($16) +; MIPS64-NEXT: ld $1, 0($sp) +; MIPS64-NEXT: sd $1, 0($16) +; MIPS64-NEXT: ld $1, 8($sp) +; MIPS64-NEXT: sd $1, 8($16) +; MIPS64-NEXT: move $sp, $fp +; MIPS64-NEXT: ld $16, 40($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $fp, 48($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 64 +; +; MIPS32-LABEL: call_v2i128: +; MIPS32: # %bb.0: +; MIPS32-NEXT: addiu $sp, $sp, -96 +; MIPS32-NEXT: sw $ra, 92($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $fp, 88($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $16, 84($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $fp, $sp +; MIPS32-NEXT: addiu $1, $zero, -32 +; MIPS32-NEXT: and $sp, $sp, $1 +; MIPS32-NEXT: move $16, $4 +; MIPS32-NEXT: lw $7, 12($4) +; MIPS32-NEXT: lw $6, 8($4) +; MIPS32-NEXT: lw $5, 4($4) +; MIPS32-NEXT: lw $4, 0($4) +; MIPS32-NEXT: lw $1, 16($16) +; MIPS32-NEXT: lw $2, 20($16) +; MIPS32-NEXT: lw $3, 24($16) +; MIPS32-NEXT: lw $8, 28($16) +; MIPS32-NEXT: sw $8, 28($sp) +; MIPS32-NEXT: sw $3, 24($sp) +; MIPS32-NEXT: sw $2, 20($sp) +; MIPS32-NEXT: jal arg_v2i128 +; MIPS32-NEXT: sw $1, 16($sp) +; MIPS32-NEXT: jal ret_v2i128 +; MIPS32-NEXT: addiu $4, $sp, 32 +; MIPS32-NEXT: lw $1, 40($sp) +; MIPS32-NEXT: lw $2, 52($sp) +; MIPS32-NEXT: lw $3, 48($sp) +; MIPS32-NEXT: lw $4, 60($sp) +; MIPS32-NEXT: lw $5, 56($sp) +; MIPS32-NEXT: sw $5, 24($16) +; MIPS32-NEXT: sw $4, 28($16) +; MIPS32-NEXT: sw $3, 16($16) +; MIPS32-NEXT: sw $2, 20($16) +; MIPS32-NEXT: sw $1, 8($16) +; MIPS32-NEXT: lw $1, 44($sp) +; MIPS32-NEXT: sw $1, 12($16) +; MIPS32-NEXT: lw $1, 32($sp) +; MIPS32-NEXT: sw $1, 0($16) +; MIPS32-NEXT: lw $1, 36($sp) +; MIPS32-NEXT: sw $1, 4($16) +; MIPS32-NEXT: move $sp, $fp +; MIPS32-NEXT: lw $16, 84($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $fp, 88($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $ra, 92($sp) # 4-byte Folded Reload +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 96 + %v1 = load <2 x i128>, ptr %p + call void @arg_v2i128(<2 x i128> %v1) + %v2 = call <2 x i128> @ret_v2i128() + store <2 x i128> %v2, ptr %p + ret void +} + +define void @arg_v3i128(<3 x i128> %vec, ptr %p) { +; MIPS64-LABEL: arg_v3i128: +; MIPS64: # %bb.0: +; MIPS64-NEXT: sd $9, 40($10) +; MIPS64-NEXT: sd $8, 32($10) +; MIPS64-NEXT: sd $7, 24($10) +; MIPS64-NEXT: sd $6, 16($10) +; MIPS64-NEXT: sd $5, 8($10) +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sd $4, 0($10) +; +; MIPS32-LABEL: arg_v3i128: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lw $1, 16($sp) +; MIPS32-NEXT: lw $2, 20($sp) +; MIPS32-NEXT: lw $3, 24($sp) +; MIPS32-NEXT: lw $8, 28($sp) +; MIPS32-NEXT: lw $9, 32($sp) +; MIPS32-NEXT: lw $10, 36($sp) +; MIPS32-NEXT: lw $11, 40($sp) +; MIPS32-NEXT: lw $12, 48($sp) +; MIPS32-NEXT: lw $13, 44($sp) +; MIPS32-NEXT: sw $13, 44($12) +; MIPS32-NEXT: sw $11, 40($12) +; MIPS32-NEXT: sw $10, 36($12) +; MIPS32-NEXT: sw $9, 32($12) +; MIPS32-NEXT: sw $8, 28($12) +; MIPS32-NEXT: sw $3, 24($12) +; MIPS32-NEXT: sw $2, 20($12) +; MIPS32-NEXT: sw $1, 16($12) +; MIPS32-NEXT: sw $7, 12($12) +; MIPS32-NEXT: sw $6, 8($12) +; MIPS32-NEXT: sw $5, 4($12) +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sw $4, 0($12) + store <3 x i128> %vec, ptr %p + ret void +} + +define <3 x i128> @ret_v3i128(ptr %p) { +; MIPS64-LABEL: ret_v3i128: +; MIPS64: # %bb.0: +; MIPS64-NEXT: ld $1, 24($5) +; MIPS64-NEXT: ld $2, 32($5) +; MIPS64-NEXT: ld $3, 40($5) +; MIPS64-NEXT: sd $3, 40($4) +; MIPS64-NEXT: sd $2, 32($4) +; MIPS64-NEXT: sd $1, 24($4) +; MIPS64-NEXT: ld $1, 16($5) +; MIPS64-NEXT: sd $1, 16($4) +; MIPS64-NEXT: ld $1, 8($5) +; MIPS64-NEXT: sd $1, 8($4) +; MIPS64-NEXT: ld $1, 0($5) +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sd $1, 0($4) +; +; MIPS32-LABEL: ret_v3i128: +; MIPS32: # %bb.0: +; MIPS32-NEXT: lw $1, 28($5) +; MIPS32-NEXT: lw $2, 32($5) +; MIPS32-NEXT: lw $3, 36($5) +; MIPS32-NEXT: lw $6, 40($5) +; MIPS32-NEXT: lw $7, 12($5) +; MIPS32-NEXT: lw $8, 16($5) +; MIPS32-NEXT: lw $9, 20($5) +; MIPS32-NEXT: lw $10, 24($5) +; MIPS32-NEXT: lw $11, 44($5) +; MIPS32-NEXT: sw $11, 44($4) +; MIPS32-NEXT: sw $6, 40($4) +; MIPS32-NEXT: sw $3, 36($4) +; MIPS32-NEXT: sw $2, 32($4) +; MIPS32-NEXT: sw $1, 28($4) +; MIPS32-NEXT: sw $10, 24($4) +; MIPS32-NEXT: sw $9, 20($4) +; MIPS32-NEXT: sw $8, 16($4) +; MIPS32-NEXT: sw $7, 12($4) +; MIPS32-NEXT: lw $1, 8($5) +; MIPS32-NEXT: sw $1, 8($4) +; MIPS32-NEXT: lw $1, 4($5) +; MIPS32-NEXT: sw $1, 4($4) +; MIPS32-NEXT: lw $1, 0($5) +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sw $1, 0($4) + %v = load <3 x i128>, ptr %p + ret <3 x i128> %v +} + +define void @call_v3i128(ptr %p) nounwind { +; MIPS64-LABEL: call_v3i128: +; MIPS64: # %bb.0: +; MIPS64-NEXT: daddiu $sp, $sp, -128 +; MIPS64-NEXT: sd $ra, 120($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $fp, 112($sp) # 8-byte Folded Spill +; MIPS64-NEXT: sd $16, 104($sp) # 8-byte Folded Spill +; MIPS64-NEXT: move $fp, $sp +; MIPS64-NEXT: daddiu $1, $zero, -64 +; MIPS64-NEXT: move $16, $4 +; MIPS64-NEXT: ld $9, 40($4) +; MIPS64-NEXT: ld $8, 32($4) +; MIPS64-NEXT: ld $7, 24($4) +; MIPS64-NEXT: ld $6, 16($4) +; MIPS64-NEXT: ld $5, 8($4) +; MIPS64-NEXT: ld $4, 0($4) +; MIPS64-NEXT: jal arg_v3i128 +; MIPS64-NEXT: and $sp, $sp, $1 +; MIPS64-NEXT: jal ret_v3i128 +; MIPS64-NEXT: daddiu $4, $sp, 0 +; MIPS64-NEXT: ld $1, 16($sp) +; MIPS64-NEXT: ld $2, 40($sp) +; MIPS64-NEXT: ld $3, 32($sp) +; MIPS64-NEXT: sd $3, 32($16) +; MIPS64-NEXT: sd $2, 40($16) +; MIPS64-NEXT: sd $1, 16($16) +; MIPS64-NEXT: ld $1, 24($sp) +; MIPS64-NEXT: sd $1, 24($16) +; MIPS64-NEXT: ld $1, 0($sp) +; MIPS64-NEXT: sd $1, 0($16) +; MIPS64-NEXT: ld $1, 8($sp) +; MIPS64-NEXT: sd $1, 8($16) +; MIPS64-NEXT: move $sp, $fp +; MIPS64-NEXT: ld $16, 104($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $fp, 112($sp) # 8-byte Folded Reload +; MIPS64-NEXT: ld $ra, 120($sp) # 8-byte Folded Reload +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 128 +; +; MIPS32-LABEL: call_v3i128: +; MIPS32: # %bb.0: +; MIPS32-NEXT: addiu $sp, $sp, -192 +; MIPS32-NEXT: sw $ra, 188($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $fp, 184($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $16, 180($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $fp, $sp +; MIPS32-NEXT: addiu $1, $zero, -64 +; MIPS32-NEXT: and $sp, $sp, $1 +; MIPS32-NEXT: move $16, $4 +; MIPS32-NEXT: lw $7, 12($4) +; MIPS32-NEXT: lw $6, 8($4) +; MIPS32-NEXT: lw $5, 4($4) +; MIPS32-NEXT: lw $4, 0($4) +; MIPS32-NEXT: lw $1, 16($16) +; MIPS32-NEXT: lw $2, 20($16) +; MIPS32-NEXT: lw $3, 24($16) +; MIPS32-NEXT: lw $8, 28($16) +; MIPS32-NEXT: lw $9, 32($16) +; MIPS32-NEXT: lw $10, 36($16) +; MIPS32-NEXT: lw $11, 40($16) +; MIPS32-NEXT: lw $12, 44($16) +; MIPS32-NEXT: sw $12, 44($sp) +; MIPS32-NEXT: sw $11, 40($sp) +; MIPS32-NEXT: sw $10, 36($sp) +; MIPS32-NEXT: sw $9, 32($sp) +; MIPS32-NEXT: sw $8, 28($sp) +; MIPS32-NEXT: sw $3, 24($sp) +; MIPS32-NEXT: sw $2, 20($sp) +; MIPS32-NEXT: jal arg_v3i128 +; MIPS32-NEXT: sw $1, 16($sp) +; MIPS32-NEXT: jal ret_v3i128 +; MIPS32-NEXT: addiu $4, $sp, 64 +; MIPS32-NEXT: lw $1, 88($sp) +; MIPS32-NEXT: lw $2, 100($sp) +; MIPS32-NEXT: lw $3, 96($sp) +; MIPS32-NEXT: lw $4, 108($sp) +; MIPS32-NEXT: lw $5, 64($sp) +; MIPS32-NEXT: lw $6, 84($sp) +; MIPS32-NEXT: lw $7, 80($sp) +; MIPS32-NEXT: lw $8, 92($sp) +; MIPS32-NEXT: lw $9, 104($sp) +; MIPS32-NEXT: sw $9, 40($16) +; MIPS32-NEXT: sw $4, 44($16) +; MIPS32-NEXT: sw $3, 32($16) +; MIPS32-NEXT: sw $2, 36($16) +; MIPS32-NEXT: sw $1, 24($16) +; MIPS32-NEXT: sw $8, 28($16) +; MIPS32-NEXT: sw $7, 16($16) +; MIPS32-NEXT: sw $6, 20($16) +; MIPS32-NEXT: sw $5, 0($16) +; MIPS32-NEXT: lw $1, 68($sp) +; MIPS32-NEXT: sw $1, 4($16) +; MIPS32-NEXT: lw $1, 72($sp) +; MIPS32-NEXT: sw $1, 8($16) +; MIPS32-NEXT: lw $1, 76($sp) +; MIPS32-NEXT: sw $1, 12($16) +; MIPS32-NEXT: move $sp, $fp +; MIPS32-NEXT: lw $16, 180($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $fp, 184($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $ra, 188($sp) # 4-byte Folded Reload +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 192 + %v1 = load <3 x i128>, ptr %p + call void @arg_v3i128(<3 x i128> %v1) + %v2 = call <3 x i128> @ret_v3i128() + store <3 x i128> %v2, ptr %p + ret void +} Index: llvm/test/CodeGen/Mips/cconv/vector.ll =================================================================== --- llvm/test/CodeGen/Mips/cconv/vector.ll +++ llvm/test/CodeGen/Mips/cconv/vector.ll @@ -6563,24 +6563,12 @@ ; ; MIPS64-LABEL: i24x2: ; MIPS64: # %bb.0: # %Entry -; MIPS64-NEXT: lui $1, 256 -; MIPS64-NEXT: daddiu $1, $1, -1 -; MIPS64-NEXT: dsll $1, $1, 24 -; MIPS64-NEXT: and $2, $5, $1 -; MIPS64-NEXT: dsrl $2, $2, 24 -; MIPS64-NEXT: sll $2, $2, 0 -; MIPS64-NEXT: and $1, $4, $1 -; MIPS64-NEXT: dsrl $1, $1, 24 -; MIPS64-NEXT: sll $1, $1, 0 -; MIPS64-NEXT: addu $1, $1, $2 -; MIPS64-NEXT: sll $2, $5, 0 -; MIPS64-NEXT: sll $3, $4, 0 -; MIPS64-NEXT: dsll $1, $1, 24 -; MIPS64-NEXT: addu $2, $3, $2 -; MIPS64-NEXT: lui $3, 255 -; MIPS64-NEXT: ori $3, $3, 65535 -; MIPS64-NEXT: and $2, $2, $3 -; MIPS64-NEXT: or $2, $2, $1 +; MIPS64-NEXT: sll $1, $6, 0 +; MIPS64-NEXT: sll $2, $4, 0 +; MIPS64-NEXT: addu $2, $2, $1 +; MIPS64-NEXT: sll $1, $7, 0 +; MIPS64-NEXT: sll $3, $5, 0 +; MIPS64-NEXT: addu $3, $3, $1 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop ; @@ -6615,56 +6603,14 @@ ; ; MIPS64R5EB-LABEL: i24x2: ; MIPS64R5EB: # %bb.0: # %Entry -; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32 -; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5EB-NEXT: sh $5, 20($sp) -; MIPS64R5EB-NEXT: dsrl $1, $5, 16 -; MIPS64R5EB-NEXT: sw $1, 16($sp) -; MIPS64R5EB-NEXT: sh $4, 28($sp) -; MIPS64R5EB-NEXT: dsrl $1, $4, 16 -; MIPS64R5EB-NEXT: sw $1, 24($sp) -; MIPS64R5EB-NEXT: lbu $1, 20($sp) -; MIPS64R5EB-NEXT: dsll $1, $1, 8 -; MIPS64R5EB-NEXT: lb $2, 19($sp) -; MIPS64R5EB-NEXT: dsll $2, $2, 16 -; MIPS64R5EB-NEXT: or $1, $2, $1 -; MIPS64R5EB-NEXT: lbu $2, 28($sp) -; MIPS64R5EB-NEXT: dsll $2, $2, 8 -; MIPS64R5EB-NEXT: lb $3, 27($sp) -; MIPS64R5EB-NEXT: dsll $3, $3, 16 -; MIPS64R5EB-NEXT: lbu $4, 21($sp) -; MIPS64R5EB-NEXT: or $2, $3, $2 -; MIPS64R5EB-NEXT: or $1, $4, $1 -; MIPS64R5EB-NEXT: lh $3, 16($sp) -; MIPS64R5EB-NEXT: dsll $3, $3, 8 -; MIPS64R5EB-NEXT: lbu $4, 18($sp) -; MIPS64R5EB-NEXT: or $3, $4, $3 -; MIPS64R5EB-NEXT: lbu $4, 29($sp) -; MIPS64R5EB-NEXT: insert.d $w0[0], $3 -; MIPS64R5EB-NEXT: insert.d $w0[1], $1 -; MIPS64R5EB-NEXT: or $1, $4, $2 -; MIPS64R5EB-NEXT: lh $2, 24($sp) -; MIPS64R5EB-NEXT: dsll $2, $2, 8 -; MIPS64R5EB-NEXT: lbu $3, 26($sp) -; MIPS64R5EB-NEXT: or $2, $3, $2 -; MIPS64R5EB-NEXT: insert.d $w1[0], $2 -; MIPS64R5EB-NEXT: insert.d $w1[1], $1 +; MIPS64R5EB-NEXT: insert.d $w0[0], $6 +; MIPS64R5EB-NEXT: insert.d $w0[1], $7 +; MIPS64R5EB-NEXT: insert.d $w1[0], $4 +; MIPS64R5EB-NEXT: insert.d $w1[1], $5 ; MIPS64R5EB-NEXT: addv.d $w0, $w1, $w0 -; MIPS64R5EB-NEXT: copy_s.d $1, $w0[1] -; MIPS64R5EB-NEXT: copy_s.d $2, $w0[0] -; MIPS64R5EB-NEXT: sb $2, 10($sp) -; MIPS64R5EB-NEXT: dsrl $3, $1, 16 -; MIPS64R5EB-NEXT: sb $3, 11($sp) -; MIPS64R5EB-NEXT: dsrl $2, $2, 8 -; MIPS64R5EB-NEXT: sh $2, 8($sp) -; MIPS64R5EB-NEXT: sb $1, 13($sp) -; MIPS64R5EB-NEXT: dsrl $1, $1, 8 -; MIPS64R5EB-NEXT: sb $1, 12($sp) -; MIPS64R5EB-NEXT: lw $1, 8($sp) -; MIPS64R5EB-NEXT: dsll $1, $1, 16 -; MIPS64R5EB-NEXT: lhu $2, 12($sp) -; MIPS64R5EB-NEXT: or $2, $2, $1 -; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EB-NEXT: shf.w $w0, $w0, 177 +; MIPS64R5EB-NEXT: copy_s.w $2, $w0[1] +; MIPS64R5EB-NEXT: copy_s.w $3, $w0[3] ; MIPS64R5EB-NEXT: jr $ra ; MIPS64R5EB-NEXT: nop ; @@ -6698,56 +6644,13 @@ ; ; MIPS64R5EL-LABEL: i24x2: ; MIPS64R5EL: # %bb.0: # %Entry -; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32 -; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5EL-NEXT: dsrl $1, $5, 32 -; MIPS64R5EL-NEXT: sh $1, 20($sp) -; MIPS64R5EL-NEXT: sw $5, 16($sp) -; MIPS64R5EL-NEXT: dsrl $1, $4, 32 -; MIPS64R5EL-NEXT: sh $1, 28($sp) -; MIPS64R5EL-NEXT: lbu $1, 20($sp) -; MIPS64R5EL-NEXT: sw $4, 24($sp) -; MIPS64R5EL-NEXT: dsll $1, $1, 8 -; MIPS64R5EL-NEXT: lbu $2, 19($sp) -; MIPS64R5EL-NEXT: or $1, $1, $2 -; MIPS64R5EL-NEXT: lb $2, 21($sp) -; MIPS64R5EL-NEXT: dsll $2, $2, 16 -; MIPS64R5EL-NEXT: lbu $3, 28($sp) -; MIPS64R5EL-NEXT: dsll $3, $3, 8 -; MIPS64R5EL-NEXT: lb $4, 18($sp) -; MIPS64R5EL-NEXT: lbu $5, 27($sp) -; MIPS64R5EL-NEXT: or $3, $3, $5 -; MIPS64R5EL-NEXT: or $1, $1, $2 -; MIPS64R5EL-NEXT: dsll $2, $4, 16 -; MIPS64R5EL-NEXT: lhu $4, 16($sp) -; MIPS64R5EL-NEXT: or $2, $4, $2 -; MIPS64R5EL-NEXT: lb $4, 29($sp) -; MIPS64R5EL-NEXT: dsll $4, $4, 16 -; MIPS64R5EL-NEXT: insert.d $w0[0], $2 -; MIPS64R5EL-NEXT: insert.d $w0[1], $1 -; MIPS64R5EL-NEXT: or $1, $3, $4 -; MIPS64R5EL-NEXT: lb $2, 26($sp) -; MIPS64R5EL-NEXT: dsll $2, $2, 16 -; MIPS64R5EL-NEXT: lhu $3, 24($sp) -; MIPS64R5EL-NEXT: or $2, $3, $2 -; MIPS64R5EL-NEXT: insert.d $w1[0], $2 -; MIPS64R5EL-NEXT: insert.d $w1[1], $1 +; MIPS64R5EL-NEXT: insert.d $w0[0], $6 +; MIPS64R5EL-NEXT: insert.d $w0[1], $7 +; MIPS64R5EL-NEXT: insert.d $w1[0], $4 +; MIPS64R5EL-NEXT: insert.d $w1[1], $5 ; MIPS64R5EL-NEXT: addv.d $w0, $w1, $w0 -; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0] -; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1] -; MIPS64R5EL-NEXT: dsrl $3, $2, 8 -; MIPS64R5EL-NEXT: sb $3, 12($sp) -; MIPS64R5EL-NEXT: dsrl $3, $2, 16 -; MIPS64R5EL-NEXT: sb $3, 13($sp) -; MIPS64R5EL-NEXT: sb $2, 11($sp) -; MIPS64R5EL-NEXT: sh $1, 8($sp) -; MIPS64R5EL-NEXT: dsrl $1, $1, 16 -; MIPS64R5EL-NEXT: sb $1, 10($sp) -; MIPS64R5EL-NEXT: lh $1, 12($sp) -; MIPS64R5EL-NEXT: dsll $1, $1, 32 -; MIPS64R5EL-NEXT: lwu $2, 8($sp) -; MIPS64R5EL-NEXT: or $2, $2, $1 -; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EL-NEXT: copy_s.w $2, $w0[0] +; MIPS64R5EL-NEXT: copy_s.w $3, $w0[2] ; MIPS64R5EL-NEXT: jr $ra ; MIPS64R5EL-NEXT: nop Entry: @@ -6794,17 +6697,22 @@ ; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(call_i24x2))) ; MIPS64EB-NEXT: daddu $1, $1, $25 ; MIPS64EB-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(call_i24x2))) -; MIPS64EB-NEXT: lui $1, 1536 -; MIPS64EB-NEXT: ori $4, $1, 7 -; MIPS64EB-NEXT: lui $1, 3072 -; MIPS64EB-NEXT: ori $5, $1, 8 ; MIPS64EB-NEXT: ld $25, %call16(i24x2)($gp) +; MIPS64EB-NEXT: daddiu $4, $zero, 6 +; MIPS64EB-NEXT: daddiu $5, $zero, 7 +; MIPS64EB-NEXT: daddiu $6, $zero, 12 +; MIPS64EB-NEXT: daddiu $7, $zero, 8 ; MIPS64EB-NEXT: jalr $25 ; MIPS64EB-NEXT: nop ; MIPS64EB-NEXT: ld $1, %got_disp(gv2i24)($gp) -; MIPS64EB-NEXT: sh $2, 4($1) -; MIPS64EB-NEXT: dsrl $2, $2, 16 -; MIPS64EB-NEXT: sw $2, 0($1) +; MIPS64EB-NEXT: sb $3, 5($1) +; MIPS64EB-NEXT: sb $2, 2($1) +; MIPS64EB-NEXT: srl $4, $3, 8 +; MIPS64EB-NEXT: sb $4, 4($1) +; MIPS64EB-NEXT: srl $3, $3, 16 +; MIPS64EB-NEXT: sb $3, 3($1) +; MIPS64EB-NEXT: srl $2, $2, 8 +; MIPS64EB-NEXT: sh $2, 0($1) ; MIPS64EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload ; MIPS64EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload ; MIPS64EB-NEXT: daddiu $sp, $sp, 16 @@ -6849,31 +6757,27 @@ ; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(call_i24x2))) ; MIPS64R5EB-NEXT: daddu $1, $1, $25 ; MIPS64R5EB-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(call_i24x2))) -; MIPS64R5EB-NEXT: lui $1, 1536 -; MIPS64R5EB-NEXT: ori $1, $1, 7 -; MIPS64R5EB-NEXT: swl $1, 2($sp) -; MIPS64R5EB-NEXT: lui $2, 3072 -; MIPS64R5EB-NEXT: ori $2, $2, 8 -; MIPS64R5EB-NEXT: swl $2, 10($sp) -; MIPS64R5EB-NEXT: sh $zero, 0($sp) -; MIPS64R5EB-NEXT: swr $1, 5($sp) -; MIPS64R5EB-NEXT: sh $zero, 8($sp) -; MIPS64R5EB-NEXT: swr $2, 13($sp) -; MIPS64R5EB-NEXT: lw $1, 0($sp) -; MIPS64R5EB-NEXT: dsll $1, $1, 16 -; MIPS64R5EB-NEXT: lhu $2, 4($sp) -; MIPS64R5EB-NEXT: or $4, $2, $1 -; MIPS64R5EB-NEXT: lw $1, 8($sp) -; MIPS64R5EB-NEXT: dsll $1, $1, 16 -; MIPS64R5EB-NEXT: lhu $2, 12($sp) -; MIPS64R5EB-NEXT: or $5, $2, $1 ; MIPS64R5EB-NEXT: ld $25, %call16(i24x2)($gp) +; MIPS64R5EB-NEXT: daddiu $4, $zero, 6 +; MIPS64R5EB-NEXT: daddiu $5, $zero, 7 +; MIPS64R5EB-NEXT: daddiu $6, $zero, 12 +; MIPS64R5EB-NEXT: daddiu $7, $zero, 8 ; MIPS64R5EB-NEXT: jalr $25 ; MIPS64R5EB-NEXT: nop -; MIPS64R5EB-NEXT: ld $1, %got_disp(gv2i24)($gp) -; MIPS64R5EB-NEXT: sh $2, 4($1) +; MIPS64R5EB-NEXT: sw $3, 12($sp) +; MIPS64R5EB-NEXT: sw $2, 4($sp) +; MIPS64R5EB-NEXT: ld.d $w0, 0($sp) +; MIPS64R5EB-NEXT: copy_s.d $1, $w0[0] +; MIPS64R5EB-NEXT: copy_s.d $2, $w0[1] +; MIPS64R5EB-NEXT: ld $3, %got_disp(gv2i24)($gp) +; MIPS64R5EB-NEXT: sb $2, 5($3) +; MIPS64R5EB-NEXT: sb $1, 2($3) +; MIPS64R5EB-NEXT: dsrl $4, $2, 8 +; MIPS64R5EB-NEXT: sb $4, 4($3) ; MIPS64R5EB-NEXT: dsrl $2, $2, 16 -; MIPS64R5EB-NEXT: sw $2, 0($1) +; MIPS64R5EB-NEXT: sb $2, 3($3) +; MIPS64R5EB-NEXT: dsrl $1, $1, 8 +; MIPS64R5EB-NEXT: sh $1, 0($3) ; MIPS64R5EB-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload ; MIPS64R5EB-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload ; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32 @@ -6918,17 +6822,22 @@ ; MIPS64EL-NEXT: lui $1, %hi(%neg(%gp_rel(call_i24x2))) ; MIPS64EL-NEXT: daddu $1, $1, $25 ; MIPS64EL-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(call_i24x2))) -; MIPS64EL-NEXT: lui $1, 1792 -; MIPS64EL-NEXT: ori $4, $1, 6 -; MIPS64EL-NEXT: lui $1, 2048 -; MIPS64EL-NEXT: ori $5, $1, 12 ; MIPS64EL-NEXT: ld $25, %call16(i24x2)($gp) +; MIPS64EL-NEXT: daddiu $4, $zero, 6 +; MIPS64EL-NEXT: daddiu $5, $zero, 7 +; MIPS64EL-NEXT: daddiu $6, $zero, 12 +; MIPS64EL-NEXT: daddiu $7, $zero, 8 ; MIPS64EL-NEXT: jalr $25 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: ld $1, %got_disp(gv2i24)($gp) -; MIPS64EL-NEXT: sw $2, 0($1) -; MIPS64EL-NEXT: dsrl $2, $2, 32 -; MIPS64EL-NEXT: sh $2, 4($1) +; MIPS64EL-NEXT: sb $3, 3($1) +; MIPS64EL-NEXT: sh $2, 0($1) +; MIPS64EL-NEXT: srl $4, $3, 8 +; MIPS64EL-NEXT: sb $4, 4($1) +; MIPS64EL-NEXT: srl $3, $3, 16 +; MIPS64EL-NEXT: sb $3, 5($1) +; MIPS64EL-NEXT: srl $2, $2, 16 +; MIPS64EL-NEXT: sb $2, 2($1) ; MIPS64EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload ; MIPS64EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 @@ -6973,31 +6882,27 @@ ; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(call_i24x2))) ; MIPS64R5EL-NEXT: daddu $1, $1, $25 ; MIPS64R5EL-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(call_i24x2))) -; MIPS64R5EL-NEXT: addiu $1, $zero, 1792 -; MIPS64R5EL-NEXT: swl $1, 5($sp) -; MIPS64R5EL-NEXT: addiu $2, $zero, 2048 -; MIPS64R5EL-NEXT: swl $2, 13($sp) -; MIPS64R5EL-NEXT: swr $1, 2($sp) -; MIPS64R5EL-NEXT: daddiu $1, $zero, 6 -; MIPS64R5EL-NEXT: sh $1, 0($sp) -; MIPS64R5EL-NEXT: swr $2, 10($sp) -; MIPS64R5EL-NEXT: daddiu $1, $zero, 12 -; MIPS64R5EL-NEXT: sh $1, 8($sp) -; MIPS64R5EL-NEXT: lh $1, 4($sp) -; MIPS64R5EL-NEXT: dsll $1, $1, 32 -; MIPS64R5EL-NEXT: lwu $2, 0($sp) -; MIPS64R5EL-NEXT: or $4, $2, $1 -; MIPS64R5EL-NEXT: lh $1, 12($sp) -; MIPS64R5EL-NEXT: dsll $1, $1, 32 -; MIPS64R5EL-NEXT: lwu $2, 8($sp) -; MIPS64R5EL-NEXT: or $5, $2, $1 ; MIPS64R5EL-NEXT: ld $25, %call16(i24x2)($gp) +; MIPS64R5EL-NEXT: daddiu $4, $zero, 6 +; MIPS64R5EL-NEXT: daddiu $5, $zero, 7 +; MIPS64R5EL-NEXT: daddiu $6, $zero, 12 +; MIPS64R5EL-NEXT: daddiu $7, $zero, 8 ; MIPS64R5EL-NEXT: jalr $25 ; MIPS64R5EL-NEXT: nop -; MIPS64R5EL-NEXT: ld $1, %got_disp(gv2i24)($gp) -; MIPS64R5EL-NEXT: sw $2, 0($1) -; MIPS64R5EL-NEXT: dsrl $2, $2, 32 -; MIPS64R5EL-NEXT: sh $2, 4($1) +; MIPS64R5EL-NEXT: sw $3, 8($sp) +; MIPS64R5EL-NEXT: sw $2, 0($sp) +; MIPS64R5EL-NEXT: ld.d $w0, 0($sp) +; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0] +; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1] +; MIPS64R5EL-NEXT: ld $3, %got_disp(gv2i24)($gp) +; MIPS64R5EL-NEXT: sb $2, 3($3) +; MIPS64R5EL-NEXT: sh $1, 0($3) +; MIPS64R5EL-NEXT: dsrl $4, $2, 8 +; MIPS64R5EL-NEXT: sb $4, 4($3) +; MIPS64R5EL-NEXT: dsrl $2, $2, 16 +; MIPS64R5EL-NEXT: sb $2, 5($3) +; MIPS64R5EL-NEXT: dsrl $1, $1, 16 +; MIPS64R5EL-NEXT: sb $1, 2($3) ; MIPS64R5EL-NEXT: ld $gp, 16($sp) # 8-byte Folded Reload ; MIPS64R5EL-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload ; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32